From 38fffa630ee80163dc65e759392ad29798905679 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Wed, 6 Nov 2024 11:53:33 +0000 Subject: [PATCH] [LLVM][IR] Use splat syntax when printing Constant[Data]Vector. (#112548) --- clang/test/CodeGen/PowerPC/altivec.c | 14 +- .../CodeGen/PowerPC/builtins-ppc-altivec.c | 102 +- .../CodeGen/PowerPC/builtins-ppc-fastmath.c | 4 +- .../CodeGen/PowerPC/builtins-ppc-p10vector.c | 20 +- .../CodeGen/PowerPC/builtins-ppc-p8vector.c | 208 +-- .../CodeGen/PowerPC/builtins-ppc-quadword.c | 2 +- clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c | 130 +- .../CodeGen/PowerPC/builtins-ppc-xlcompat.c | 16 +- clang/test/CodeGen/PowerPC/ppc-emmintrin.c | 32 +- clang/test/CodeGen/PowerPC/ppc-xmmintrin.c | 16 +- ...r-bool-pixel-altivec-init-no-parentheses.c | 10 +- .../PowerPC/vector-bool-pixel-altivec-init.c | 10 +- .../test/CodeGen/RISCV/rvv-vls-bitwise-ops.c | 16 +- .../builtins-systemz-zvector-constrained.c | 16 +- .../SystemZ/builtins-systemz-zvector.c | 74 +- clang/test/CodeGen/SystemZ/zvector.c | 160 +- clang/test/CodeGen/SystemZ/zvector2.c | 8 +- clang/test/CodeGen/X86/avx-builtins.c | 4 +- clang/test/CodeGen/X86/avx10_2bf16-builtins.c | 2 +- clang/test/CodeGen/X86/avx2-builtins.c | 12 +- clang/test/CodeGen/X86/avx512bw-builtins.c | 12 +- clang/test/CodeGen/X86/avx512dq-builtins.c | 18 +- clang/test/CodeGen/X86/avx512f-builtins.c | 62 +- clang/test/CodeGen/X86/avx512vbmi2-builtins.c | 36 +- clang/test/CodeGen/X86/avx512vl-builtins.c | 72 +- clang/test/CodeGen/X86/avx512vldq-builtins.c | 16 +- .../test/CodeGen/X86/avx512vlvbmi2-builtins.c | 72 +- clang/test/CodeGen/X86/mmx-builtins.c | 6 +- clang/test/CodeGen/X86/sse-builtins.c | 2 +- clang/test/CodeGen/X86/sse2-builtins.c | 8 +- clang/test/CodeGen/X86/sse41-builtins.c | 8 +- clang/test/CodeGen/X86/xop-builtins-cmp.c | 16 +- clang/test/CodeGen/X86/xop-builtins.c | 12 +- clang/test/CodeGen/aarch64-neon-3v.c | 64 +- clang/test/CodeGen/aarch64-neon-intrinsics.c | 324 ++-- clang/test/CodeGen/aarch64-neon-misc.c | 52 +- clang/test/CodeGen/aarch64-neon-shifts.c | 8 +- clang/test/CodeGen/aarch64-neon-tbl.c | 24 +- clang/test/CodeGen/aarch64-poly64.c | 4 +- .../CodeGen/aarch64-sve-vls-bitwise-ops.c | 18 +- .../aarch64-v8.2a-neon-intrinsics-generic.c | 4 +- .../CodeGen/arm-bf16-convert-intrinsics.c | 18 +- .../test/CodeGen/arm-mve-intrinsics/absneg.c | 36 +- .../CodeGen/arm-mve-intrinsics/bitwise-imm.c | 56 +- .../CodeGen/arm-mve-intrinsics/cplusplus.cpp | 8 +- clang/test/CodeGen/arm-mve-intrinsics/vbicq.c | 8 +- .../arm-mve-intrinsics/vector-shift-imm.c | 30 +- clang/test/CodeGen/arm-mve-intrinsics/vornq.c | 8 +- clang/test/CodeGen/arm-neon-shifts.c | 8 +- clang/test/CodeGen/arm_neon_intrinsics.c | 476 +++--- .../test/CodeGen/builtins-elementwise-math.c | 4 +- clang/test/CodeGen/builtins-nvptx.c | 12 +- clang/test/CodeGen/builtinshufflevector2.c | 2 +- clang/test/CodeGen/const-init.c | 2 +- clang/test/CodeGen/matrix-type-operators.c | 10 +- clang/test/CodeGen/neon-immediate-ubsan.c | 2 +- clang/test/CodeGen/nofpclass.c | 8 +- clang/test/CodeGen/ppc-vec_ct-truncate.c | 16 +- clang/test/CodeGen/variadic-nvptx.c | 2 +- clang/test/CodeGen/vecshift.c | 14 +- clang/test/CodeGen/vector-scalar.c | 10 +- clang/test/CodeGenCXX/auto-var-init.cpp | 20 +- clang/test/CodeGenCXX/ext-int.cpp | 24 +- .../ext-vector-type-conditional.cpp | 38 +- .../test/CodeGenCXX/matrix-type-builtins.cpp | 2 +- .../test/CodeGenCXX/matrix-type-operators.cpp | 2 +- .../CodeGenCXX/vector-size-conditional.cpp | 10 +- .../CodeGenCXX/vector-splat-conversion.cpp | 12 +- .../standard_conversion_sequences.hlsl | 22 +- .../CodeGenHLSL/builtins/ScalarSwizzles.hlsl | 18 +- clang/test/CodeGenHLSL/builtins/rcp.hlsl | 24 +- clang/test/CodeGenHLSL/builtins/sign.hlsl | 18 +- clang/test/CodeGenOpenCL/bool_cast.cl | 4 +- clang/test/CodeGenOpenCL/logical-ops.cl | 16 +- .../test/CodeGenOpenCL/partial_initializer.cl | 4 +- clang/test/CodeGenOpenCL/shifts.cl | 6 +- clang/test/CodeGenOpenCL/vector_literals.cl | 4 +- .../Headers/__clang_hip_math_deprecated.hip | 2 +- clang/test/Headers/wasm.c | 18 +- llvm/lib/IR/AsmWriter.cpp | 17 + .../Analysis/CostModel/AArch64/arith-fp.ll | 14 +- .../CostModel/AArch64/arith-widening.ll | 6 +- llvm/test/Analysis/CostModel/AArch64/div.ll | 144 +- .../Analysis/CostModel/AArch64/div_cte.ll | 12 +- llvm/test/Analysis/CostModel/AArch64/fshl.ll | 8 +- llvm/test/Analysis/CostModel/AArch64/fshr.ll | 8 +- .../Analysis/CostModel/AArch64/logicalop.ll | 4 +- .../CostModel/AArch64/mem-op-cost-model.ll | 32 +- llvm/test/Analysis/CostModel/AArch64/rem.ll | 144 +- llvm/test/Analysis/CostModel/AMDGPU/div.ll | 432 +++--- llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll | 384 ++--- llvm/test/Analysis/CostModel/AMDGPU/fneg.ll | 24 +- .../Analysis/CostModel/AMDGPU/logicalop.ll | 8 +- llvm/test/Analysis/CostModel/AMDGPU/mul.ll | 192 +-- llvm/test/Analysis/CostModel/AMDGPU/rem.ll | 432 +++--- llvm/test/Analysis/CostModel/ARM/divrem.ll | 820 +++++----- llvm/test/Analysis/CostModel/ARM/logicalop.ll | 16 +- .../Analysis/CostModel/PowerPC/logicalop.ll | 2 +- .../Analysis/CostModel/RISCV/arith-int.ll | 12 +- .../Analysis/CostModel/RISCV/logicalop.ll | 4 +- .../CostModel/RISCV/rvv-load-store.ll | 12 +- .../Analysis/CostModel/RISCV/rvv-phi-const.ll | 28 +- .../Analysis/CostModel/RISCV/rvv-select.ll | 2 +- .../Analysis/CostModel/SystemZ/divrem-pow2.ll | 72 +- .../Analysis/CostModel/SystemZ/logicalop.ll | 4 +- .../CostModel/X86/arith-fp-codesize.ll | 48 +- .../CostModel/X86/arith-fp-latency.ll | 96 +- .../CostModel/X86/arith-fp-sizelatency.ll | 84 +- llvm/test/Analysis/CostModel/X86/arith-fp.ll | 96 +- .../Analysis/CostModel/X86/div-codesize.ll | 336 ++-- .../Analysis/CostModel/X86/div-latency.ll | 384 ++--- .../Analysis/CostModel/X86/div-sizelatency.ll | 336 ++-- llvm/test/Analysis/CostModel/X86/div.ll | 768 ++++----- .../Analysis/CostModel/X86/fshl-codesize.ll | 504 +++--- .../Analysis/CostModel/X86/fshl-latency.ll | 486 +++--- .../CostModel/X86/fshl-sizelatency.ll | 552 +++---- llvm/test/Analysis/CostModel/X86/fshl.ll | 486 +++--- .../Analysis/CostModel/X86/fshr-codesize.ll | 504 +++--- .../Analysis/CostModel/X86/fshr-latency.ll | 486 +++--- .../CostModel/X86/fshr-sizelatency.ll | 552 +++---- llvm/test/Analysis/CostModel/X86/fshr.ll | 486 +++--- llvm/test/Analysis/CostModel/X86/logicalop.ll | 4 +- .../X86/masked-intrinsic-codesize.ll | 52 +- .../X86/masked-intrinsic-cost-inseltpoison.ll | 52 +- .../CostModel/X86/masked-intrinsic-cost.ll | 52 +- .../CostModel/X86/masked-intrinsic-latency.ll | 52 +- .../X86/masked-intrinsic-sizelatency.ll | 52 +- .../Analysis/CostModel/X86/mul-codesize.ll | 384 ++--- .../Analysis/CostModel/X86/mul-latency.ll | 384 ++--- .../Analysis/CostModel/X86/mul-sizelatency.ll | 384 ++--- llvm/test/Analysis/CostModel/X86/mul.ll | 384 ++--- .../Analysis/CostModel/X86/rem-codesize.ll | 360 ++--- .../Analysis/CostModel/X86/rem-latency.ll | 336 ++-- .../Analysis/CostModel/X86/rem-sizelatency.ll | 336 ++-- llvm/test/Analysis/CostModel/X86/rem.ll | 768 ++++----- .../Analysis/CostModel/X86/slm-arith-costs.ll | 8 +- llvm/test/Analysis/CostModel/X86/vdiv-cost.ll | 42 +- .../CostModel/X86/vshift-ashr-codesize.ll | 154 +- .../X86/vshift-ashr-cost-inseltpoison.ll | 160 +- .../CostModel/X86/vshift-ashr-cost.ll | 160 +- .../CostModel/X86/vshift-ashr-latency.ll | 184 +-- .../CostModel/X86/vshift-ashr-sizelatency.ll | 158 +- .../CostModel/X86/vshift-lshr-codesize.ll | 150 +- .../X86/vshift-lshr-cost-inseltpoison.ll | 144 +- .../CostModel/X86/vshift-lshr-cost.ll | 144 +- .../CostModel/X86/vshift-lshr-latency.ll | 164 +- .../CostModel/X86/vshift-lshr-sizelatency.ll | 150 +- .../CostModel/X86/vshift-shl-codesize.ll | 150 +- .../X86/vshift-shl-cost-inseltpoison.ll | 144 +- .../Analysis/CostModel/X86/vshift-shl-cost.ll | 144 +- .../CostModel/X86/vshift-shl-latency.ll | 164 +- .../CostModel/X86/vshift-shl-sizelatency.ll | 132 +- .../DemandedBits/vectors-inseltpoison.ll | 22 +- llvm/test/Analysis/DemandedBits/vectors.ll | 22 +- .../test/Analysis/ValueTracking/known-bits.ll | 2 +- .../Analysis/ValueTracking/known-fpclass.ll | 2 +- .../Analysis/ValueTracking/known-non-zero.ll | 16 +- .../knownbits-and-or-xor-lowbit.ll | 14 +- .../ValueTracking/knownbits-bmi-pattern.ll | 52 +- .../ValueTracking/knownbits-x86-hadd-hsub.ll | 16 +- .../Analysis/ValueTracking/knownzero-shift.ll | 8 +- .../Analysis/ValueTracking/numsignbits-shl.ll | 12 +- llvm/test/Assembler/ConstantExprFold.ll | 4 +- llvm/test/Assembler/constant-splat.ll | 24 +- llvm/test/Assembler/opaque-ptr.ll | 2 +- llvm/test/Bitcode/constantsTest.3.2.ll | 10 +- .../AArch64/arm64-codegen-prepare-extload.ll | 2 +- .../AMDGPU/amdgpu-codegenprepare-fdiv.ll | 18 +- .../amdgpu-codegenprepare-i16-to-i32.ll | 4 +- .../AMDGPU/amdgpu-codegenprepare-idiv.ll | 16 +- .../AMDGPU/amdgpu-codegenprepare-mul24.ll | 104 +- .../AMDGPU/amdgpu-late-codegenprepare.ll | 4 +- .../AMDGPU/amdgpu-simplify-libcall-pow.ll | 62 +- .../AMDGPU/amdgpu-simplify-libcall-pown.ll | 34 +- .../AMDGPU/amdgpu-simplify-libcall-powr.ll | 34 +- .../AMDGPU/amdgpu-simplify-libcall-rootn.ll | 34 +- llvm/test/CodeGen/AMDGPU/fract-match.ll | 28 +- .../lower-buffer-fat-pointers-p7-in-memory.ll | 4 +- .../lower-buffer-fat-pointers-pointer-ops.ll | 16 +- .../AMDGPU/promote-alloca-array-aggregate.ll | 2 +- .../CodeGen/AMDGPU/promote-alloca-memset.ll | 2 +- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll | 2 +- llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll | 6 +- llvm/test/CodeGen/ARM/vector-promotion.ll | 14 +- .../Hexagon/autohvx/vector-align-tbaa.ll | 18 +- llvm/test/CodeGen/NVPTX/variadics-lowering.ll | 2 +- ...xed-vectors-strided-load-store-negative.ll | 22 +- .../rvv/fixed-vectors-strided-load-store.ll | 138 +- .../LowOverheadLoops/tail-pred-const.ll | 186 ++- .../Thumb2/mve-gather-optimisation-deep.ll | 32 +- .../CodeGen/X86/codegen-prepare-extload.ll | 2 +- .../MemorySanitizer/AArch64/arm64-vshift.ll | 1378 ++++++++--------- .../MemorySanitizer/AArch64/neon_tbl.ll | 6 +- .../MemorySanitizer/AArch64/neon_vst.ll | 242 +-- .../MemorySanitizer/X86/avx-intrinsics-x86.ll | 74 +- .../X86/avx2-intrinsics-x86.ll | 78 +- .../X86/sse2-intrinsics-x86.ll | 102 +- .../X86/sse41-intrinsics-x86.ll | 28 +- .../i386/avx-intrinsics-x86.ll | 12 +- .../i386/avx2-intrinsics-x86.ll | 10 +- .../i386/sse2-intrinsics-x86.ll | 2 +- .../i386/sse41-intrinsics-x86.ll | 12 +- .../MemorySanitizer/masked-store-load.ll | 36 +- .../MemorySanitizer/msan_basic.ll | 54 +- .../Instrumentation/MemorySanitizer/reduce.ll | 2 +- .../vector-track-origins-neon.ll | 24 +- .../MemorySanitizer/vector_arith.ll | 2 +- .../NumericalStabilitySanitizer/basic.ll | 12 +- .../AggressiveInstCombine/ARM/fptosisat.ll | 20 +- .../AggressiveInstCombine/X86/fptosisat.ll | 4 +- .../AggressiveInstCombine/masked-cmp.ll | 10 +- .../AggressiveInstCombine/popcount.ll | 30 +- .../AggressiveInstCombine/trunc_multi_uses.ll | 8 +- .../AggressiveInstCombine/vector-or-load.ll | 2 +- .../Transforms/Attributor/nofpclass-powi.ll | 4 +- llvm/test/Transforms/Attributor/nofpclass.ll | 8 +- .../value-simplify-pointer-info-vec.ll | 4 +- llvm/test/Transforms/BDCE/binops-multiuse.ll | 4 +- llvm/test/Transforms/BDCE/dead-uses.ll | 8 +- .../Transforms/BDCE/vectors-inseltpoison.ll | 30 +- llvm/test/Transforms/BDCE/vectors.ll | 30 +- .../CodeGenPrepare/X86/fold-loop-of-urem.ll | 4 +- .../X86/gather-scatter-opt-inseltpoison.ll | 12 +- .../CodeGenPrepare/X86/gather-scatter-opt.ll | 12 +- .../AArch64/large-immediate.ll | 16 +- .../ConstraintElimination/geps-ptrvector.ll | 2 +- .../ConstraintElimination/vector-compares.ll | 2 +- .../CorrelatedValuePropagation/icmp.ll | 6 +- .../CorrelatedValuePropagation/overflows.ll | 4 +- .../CorrelatedValuePropagation/vectors.ll | 32 +- .../X86/gather-null-pointer.ll | 6 +- .../DeadStoreElimination/masked-dead-store.ll | 8 +- .../offsetted-overlapping-stores.ll | 2 +- .../DivRemPairs/AMDGPU/div-rem-pairs.ll | 4 +- llvm/test/Transforms/EarlyCSE/commute.ll | 10 +- llvm/test/Transforms/EarlyCSE/gep.ll | 2 +- .../GVN/non-integral-pointers-inseltpoison.ll | 4 +- .../Transforms/GVN/non-integral-pointers.ll | 2 +- .../AMDGPU/infer-getelementptr.ll | 2 +- .../masked-gather-scatter.ll | 4 +- .../InstCombine/2007-03-21-SignedRangeTest.ll | 4 +- .../InstCombine/2008-01-21-MulTrunc.ll | 4 +- .../InstCombine/2008-07-11-RemAnd.ll | 2 +- .../InstCombine/2008-12-17-SRemNegConstVec.ll | 2 +- .../AArch64/2012-04-23-Neon-Intrinsics.ll | 10 +- .../InstCombine/AArch64/aes-intrinsics.ll | 4 +- .../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 2 +- .../ARM/2012-04-23-Neon-Intrinsics.ll | 10 +- .../InstCombine/ARM/aes-intrinsics.ll | 4 +- .../Transforms/InstCombine/ARM/mve-v2i2v.ll | 16 +- .../InstCombine/X86/x86-masked-memops.ll | 4 +- .../Transforms/InstCombine/X86/x86-movmsk.ll | 2 +- .../InstCombine/X86/x86-muldq-inseltpoison.ll | 32 +- .../Transforms/InstCombine/X86/x86-muldq.ll | 32 +- .../InstCombine/X86/x86-pack-inseltpoison.ll | 48 +- .../Transforms/InstCombine/X86/x86-pack.ll | 48 +- .../Transforms/InstCombine/X86/x86-pmulh.ll | 12 +- .../Transforms/InstCombine/X86/x86-pmulhrs.ll | 12 +- .../Transforms/InstCombine/X86/x86-ternlog.ll | 30 +- .../X86/x86-vector-shifts-inseltpoison.ll | 158 +- .../InstCombine/X86/x86-vector-shifts.ll | 158 +- .../InstCombine/X86/x86-xop-inseltpoison.ll | 4 +- .../Transforms/InstCombine/X86/x86-xop.ll | 4 +- llvm/test/Transforms/InstCombine/abs-1.ll | 2 +- .../Transforms/InstCombine/abs-intrinsic.ll | 14 +- .../Transforms/InstCombine/add-mask-neg.ll | 10 +- llvm/test/Transforms/InstCombine/add-mask.ll | 2 +- .../InstCombine/add-shl-sdiv-to-srem.ll | 18 +- .../test/Transforms/InstCombine/add-sitofp.ll | 8 +- llvm/test/Transforms/InstCombine/add.ll | 66 +- llvm/test/Transforms/InstCombine/add4.ll | 4 +- .../test/Transforms/InstCombine/add_or_sub.ll | 4 +- .../InstCombine/addsub-constant-folding.ll | 54 +- .../InstCombine/adjust-for-minmax.ll | 20 +- .../Transforms/InstCombine/and-compare.ll | 6 +- llvm/test/Transforms/InstCombine/and-fcmp.ll | 2 +- .../InstCombine/and-or-icmp-const-icmp.ll | 18 +- .../Transforms/InstCombine/and-or-icmps.ll | 26 +- .../test/Transforms/InstCombine/and-or-not.ll | 2 +- llvm/test/Transforms/InstCombine/and-or.ll | 40 +- .../test/Transforms/InstCombine/and-xor-or.ll | 52 +- llvm/test/Transforms/InstCombine/and.ll | 90 +- llvm/test/Transforms/InstCombine/and2.ll | 8 +- llvm/test/Transforms/InstCombine/apint-add.ll | 2 +- .../test/Transforms/InstCombine/apint-mul1.ll | 2 +- .../test/Transforms/InstCombine/apint-mul2.ll | 2 +- .../Transforms/InstCombine/apint-select.ll | 6 +- .../Transforms/InstCombine/apint-shift.ll | 44 +- .../Transforms/InstCombine/ashr-demand.ll | 8 +- llvm/test/Transforms/InstCombine/ashr-lshr.ll | 2 +- llvm/test/Transforms/InstCombine/avg-lsb.ll | 2 +- .../InstCombine/binop-and-shifts.ll | 28 +- .../test/Transforms/InstCombine/binop-cast.ll | 4 +- .../InstCombine/binop-of-displaced-shifts.ll | 2 +- .../binop-select-cast-of-select-cond.ll | 6 +- .../Transforms/InstCombine/binop-select.ll | 6 +- .../test/Transforms/InstCombine/bit-checks.ll | 10 +- llvm/test/Transforms/InstCombine/bit_ceil.ll | 6 +- llvm/test/Transforms/InstCombine/bit_floor.ll | 6 +- .../InstCombine/bitcast-inseltpoison.ll | 6 +- llvm/test/Transforms/InstCombine/bitcast.ll | 8 +- .../test/Transforms/InstCombine/bitreverse.ll | 2 +- .../test/Transforms/InstCombine/bswap-fold.ll | 26 +- llvm/test/Transforms/InstCombine/bswap.ll | 4 +- .../canonicalize-ashr-shl-to-masking.ll | 28 +- ...etween-negative-and-positive-thresholds.ll | 8 +- ...ern-between-zero-and-positive-threshold.ll | 8 +- ...nt-low-bit-mask-and-icmp-eq-to-icmp-ule.ll | 4 +- ...nt-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll | 2 +- ...t-low-bit-mask-and-icmp-sge-to-icmp-sle.ll | 4 +- ...t-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll | 2 +- ...t-low-bit-mask-and-icmp-sle-to-icmp-sle.ll | 4 +- ...t-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll | 2 +- ...t-low-bit-mask-and-icmp-uge-to-icmp-ule.ll | 4 +- ...t-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll | 2 +- ...t-low-bit-mask-and-icmp-ule-to-icmp-ule.ll | 4 +- ...t-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll | 2 +- .../InstCombine/canonicalize-fcmp-inf.ll | 2 +- ...icalize-lack-of-signed-truncation-check.ll | 10 +- ...ze-low-bit-mask-and-icmp-eq-to-icmp-ule.ll | 2 +- ...ze-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll | 2 +- ...low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll | 4 +- ...low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll | 4 +- ...low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll | 6 +- ...low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll | 6 +- .../canonicalize-lshr-shl-to-masking.ll | 28 +- .../canonicalize-shl-lshr-to-masking.ll | 28 +- .../canonicalize-signed-truncation-check.ll | 10 +- .../Transforms/InstCombine/canonicalize.ll | 2 +- .../InstCombine/cast-int-fcmp-eq-0.ll | 2 +- .../InstCombine/cast-int-icmp-eq-0.ll | 6 +- llvm/test/Transforms/InstCombine/cast.ll | 62 +- .../Transforms/InstCombine/clamp-to-minmax.ll | 4 +- .../Transforms/InstCombine/cmp-intrinsic.ll | 36 +- .../combine-is.fpclass-and-fcmp.ll | 2 +- .../Transforms/InstCombine/compare-signs.ll | 18 +- .../Transforms/InstCombine/compare-udiv.ll | 20 +- .../InstCombine/consecutive-ptrmask.ll | 2 +- .../InstCombine/copysign-fneg-fabs.ll | 2 +- .../create-class-from-logic-fcmp.ll | 4 +- .../InstCombine/ctlz-cttz-shifts.ll | 6 +- .../test/Transforms/InstCombine/ctpop-cttz.ll | 2 +- .../test/Transforms/InstCombine/ctpop-pow2.ll | 6 +- llvm/test/Transforms/InstCombine/ctpop.ll | 24 +- llvm/test/Transforms/InstCombine/demorgan.ll | 4 +- .../Transforms/InstCombine/dependent-ivs.ll | 6 +- llvm/test/Transforms/InstCombine/div-shift.ll | 8 +- llvm/test/Transforms/InstCombine/div.ll | 60 +- .../Transforms/InstCombine/eq-of-parts.ll | 8 +- llvm/test/Transforms/InstCombine/exact.ll | 22 +- llvm/test/Transforms/InstCombine/exp2-1.ll | 8 +- .../Transforms/InstCombine/exp2-to-ldexp.ll | 2 +- .../Transforms/InstCombine/fabs-as-int.ll | 2 +- .../Transforms/InstCombine/fabs-copysign.ll | 4 +- .../Transforms/InstCombine/fabs-fneg-fold.ll | 2 +- llvm/test/Transforms/InstCombine/fadd.ll | 2 +- llvm/test/Transforms/InstCombine/fast-math.ll | 2 +- .../InstCombine/fcmp-range-check-idiom.ll | 2 +- llvm/test/Transforms/InstCombine/fcmp.ll | 8 +- llvm/test/Transforms/InstCombine/fdiv.ll | 18 +- llvm/test/Transforms/InstCombine/fma.ll | 34 +- .../InstCombine/fmul-inseltpoison.ll | 2 +- llvm/test/Transforms/InstCombine/fmul-sqrt.ll | 2 +- llvm/test/Transforms/InstCombine/fmul.ll | 12 +- .../Transforms/InstCombine/fneg-as-int.ll | 2 +- .../InstCombine/fneg-fabs-as-int.ll | 2 +- .../InstCombine/fold-bin-operand.ll | 2 +- .../InstCombine/fold-ctpop-of-not.ll | 4 +- .../InstCombine/fold-select-fmul-if-zero.ll | 2 +- .../InstCombine/fold-select-trunc.ll | 2 +- .../InstCombine/fold-signbit-test-power2.ll | 16 +- .../fold-sub-of-not-to-inc-of-add.ll | 6 +- .../InstCombine/fpclass-check-idioms.ll | 4 +- llvm/test/Transforms/InstCombine/fsh.ll | 24 +- llvm/test/Transforms/InstCombine/funnel.ll | 16 +- .../InstCombine/gep-combine-loop-invariant.ll | 10 +- .../Transforms/InstCombine/gep-custom-dl.ll | 4 +- .../test/Transforms/InstCombine/gep-vector.ll | 4 +- .../get-lowbitmask-upto-and-including-bit.ll | 16 +- .../Transforms/InstCombine/getelementptr.ll | 6 +- .../high-bit-signmask-with-trunc.ll | 2 +- .../InstCombine/high-bit-signmask.ll | 2 +- ...n-out-of-bias-calculation-with-constant.ll | 2 +- .../hoist-negation-out-of-bias-calculation.ll | 4 +- .../hoist-not-from-ashr-operand.ll | 4 +- ...hoist-xor-by-constant-from-xor-by-value.ll | 2 +- llvm/test/Transforms/InstCombine/icmp-add.ll | 44 +- .../Transforms/InstCombine/icmp-and-shift.ll | 26 +- .../InstCombine/icmp-div-constant.ll | 18 +- llvm/test/Transforms/InstCombine/icmp-fsh.ll | 2 +- .../Transforms/InstCombine/icmp-logical.ll | 24 +- .../Transforms/InstCombine/icmp-mul-and.ll | 6 +- llvm/test/Transforms/InstCombine/icmp-mul.ll | 18 +- .../InstCombine/icmp-not-bool-constant.ll | 20 +- .../Transforms/InstCombine/icmp-of-and-x.ll | 4 +- .../Transforms/InstCombine/icmp-of-or-x.ll | 6 +- .../Transforms/InstCombine/icmp-of-xor-x.ll | 10 +- llvm/test/Transforms/InstCombine/icmp-or.ll | 6 +- .../icmp-power2-and-icmp-shifted-mask.ll | 28 +- .../test/Transforms/InstCombine/icmp-range.ll | 30 +- .../Transforms/InstCombine/icmp-rotate.ll | 4 +- .../Transforms/InstCombine/icmp-select.ll | 2 +- .../InstCombine/icmp-shl-1-overflow.ll | 12 +- .../Transforms/InstCombine/icmp-shl-nsw.ll | 2 +- .../Transforms/InstCombine/icmp-shl-nuw.ll | 8 +- llvm/test/Transforms/InstCombine/icmp-shl.ll | 8 +- .../Transforms/InstCombine/icmp-shr-lt-gt.ll | 2 +- llvm/test/Transforms/InstCombine/icmp-shr.ll | 6 +- .../Transforms/InstCombine/icmp-signmask.ll | 2 +- llvm/test/Transforms/InstCombine/icmp-sub.ll | 10 +- .../test/Transforms/InstCombine/icmp-trunc.ll | 22 +- .../Transforms/InstCombine/icmp-uadd-sat.ll | 92 +- ...al-to-icmp-eq-of-lshr-val-by-bits-and-0.ll | 10 +- ...al-to-icmp-eq-of-lshr-val-by-bits-and-0.ll | 4 +- ...al-to-icmp-ne-of-lshr-val-by-bits-and-0.ll | 10 +- ...al-to-icmp-ne-of-lshr-val-by-bits-and-0.ll | 4 +- .../Transforms/InstCombine/icmp-usub-sat.ll | 20 +- .../InstCombine/icmp-vec-inseltpoison.ll | 20 +- llvm/test/Transforms/InstCombine/icmp-vec.ll | 24 +- .../InstCombine/icmp-with-selects.ll | 2 +- .../InstCombine/icmp-xor-signbit.ll | 10 +- llvm/test/Transforms/InstCombine/icmp.ll | 226 +-- .../insert-extract-shuffle-inseltpoison.ll | 2 +- .../InstCombine/insert-extract-shuffle.ll | 2 +- .../Transforms/InstCombine/insertelement.ll | 8 +- .../integer-round-up-pow2-alignment.ll | 40 +- .../InstCombine/intrinsic-select.ll | 2 +- .../test/Transforms/InstCombine/intrinsics.ll | 16 +- ...rt-variable-mask-in-masked-merge-vector.ll | 24 +- .../test/Transforms/InstCombine/is_fpclass.ll | 8 +- llvm/test/Transforms/InstCombine/ispow2.ll | 32 +- .../test/Transforms/InstCombine/known-bits.ll | 8 +- llvm/test/Transforms/InstCombine/ldexp-ext.ll | 4 +- llvm/test/Transforms/InstCombine/ldexp.ll | 2 +- .../InstCombine/load-store-forward.ll | 2 +- .../load-store-masked-constant-array.ll | 4 +- llvm/test/Transforms/InstCombine/log-pow.ll | 2 +- .../logical-select-inseltpoison.ll | 2 +- .../Transforms/InstCombine/logical-select.ll | 22 +- .../Transforms/InstCombine/low-bit-splat.ll | 2 +- .../InstCombine/lshr-and-negC-icmpeq-zero.ll | 8 +- .../lshr-and-signbit-icmpeq-zero.ll | 8 +- .../lshr-trunc-sext-to-ashr-sext.ll | 8 +- llvm/test/Transforms/InstCombine/lshr.ll | 34 +- .../InstCombine/masked-merge-add.ll | 10 +- .../InstCombine/masked-merge-and-of-ors.ll | 6 +- .../Transforms/InstCombine/masked-merge-or.ll | 10 +- .../InstCombine/masked-merge-xor.ll | 10 +- .../masked_intrinsics-inseltpoison.ll | 2 +- .../InstCombine/masked_intrinsics.ll | 6 +- .../Transforms/InstCombine/max-of-nots.ll | 10 +- llvm/test/Transforms/InstCombine/maximum.ll | 2 +- llvm/test/Transforms/InstCombine/maxnum.ll | 2 +- .../test/Transforms/InstCombine/merge-icmp.ll | 6 +- .../Transforms/InstCombine/min-positive.ll | 2 +- .../Transforms/InstCombine/minmax-fold.ll | 24 +- .../InstCombine/minmax-intrinsics.ll | 22 +- .../Transforms/InstCombine/minmax-of-xor-x.ll | 8 +- llvm/test/Transforms/InstCombine/modulo.ll | 14 +- .../InstCombine/mul-inseltpoison.ll | 26 +- .../Transforms/InstCombine/mul-masked-bits.ll | 12 +- llvm/test/Transforms/InstCombine/mul.ll | 34 +- llvm/test/Transforms/InstCombine/mul_fold.ll | 4 +- .../Transforms/InstCombine/narrow-math.ll | 48 +- llvm/test/Transforms/InstCombine/narrow.ll | 2 +- .../Transforms/InstCombine/negated-bitmask.ll | 16 +- .../Transforms/InstCombine/nested-select.ll | 14 +- llvm/test/Transforms/InstCombine/not.ll | 32 +- .../InstCombine/nsw-inseltpoison.ll | 4 +- llvm/test/Transforms/InstCombine/nsw.ll | 4 +- ...of-two-or-zero-when-comparing-with-zero.ll | 4 +- .../Transforms/InstCombine/onehot_merge.ll | 32 +- .../InstCombine/operand-complexity.ll | 2 +- llvm/test/Transforms/InstCombine/or-concat.ll | 8 +- llvm/test/Transforms/InstCombine/or-xor.ll | 10 +- llvm/test/Transforms/InstCombine/or.ll | 40 +- .../Transforms/InstCombine/overflow-mul.ll | 2 +- ...nput-masking-after-truncation-variant-a.ll | 16 +- ...nput-masking-after-truncation-variant-b.ll | 16 +- ...nput-masking-after-truncation-variant-c.ll | 6 +- ...nput-masking-after-truncation-variant-d.ll | 12 +- ...nput-masking-after-truncation-variant-e.ll | 4 +- ...dant-left-shift-input-masking-variant-a.ll | 16 +- ...dant-left-shift-input-masking-variant-b.ll | 16 +- ...dant-left-shift-input-masking-variant-c.ll | 8 +- ...dant-left-shift-input-masking-variant-d.ll | 14 +- ...dant-left-shift-input-masking-variant-e.ll | 4 +- llvm/test/Transforms/InstCombine/pow-0.ll | 8 +- llvm/test/Transforms/InstCombine/pow-1.ll | 54 +- llvm/test/Transforms/InstCombine/pow-sqrt.ll | 16 +- .../Transforms/InstCombine/pow-to-ldexp.ll | 26 +- .../test/Transforms/InstCombine/pow_fp_int.ll | 2 +- llvm/test/Transforms/InstCombine/pr14365.ll | 6 +- llvm/test/Transforms/InstCombine/pr17827.ll | 8 +- .../InstCombine/pr38984-inseltpoison.ll | 2 +- llvm/test/Transforms/InstCombine/pr38984.ll | 2 +- llvm/test/Transforms/InstCombine/pr53357.ll | 4 +- llvm/test/Transforms/InstCombine/pr98435.ll | 2 +- llvm/test/Transforms/InstCombine/ptrmask.ll | 8 +- ...nput-masking-after-truncation-variant-a.ll | 12 +- ...nput-masking-after-truncation-variant-b.ll | 10 +- ...nput-masking-after-truncation-variant-c.ll | 4 +- ...nput-masking-after-truncation-variant-d.ll | 10 +- ...nput-masking-after-truncation-variant-e.ll | 2 +- ...nput-masking-after-truncation-variant-f.ll | 2 +- ...dant-left-shift-input-masking-variant-a.ll | 10 +- ...dant-left-shift-input-masking-variant-b.ll | 10 +- ...dant-left-shift-input-masking-variant-c.ll | 6 +- ...dant-left-shift-input-masking-variant-d.ll | 12 +- ...dant-left-shift-input-masking-variant-e.ll | 2 +- ...dant-left-shift-input-masking-variant-f.ll | 2 +- .../redundant-right-shift-input-masking.ll | 2 +- .../Transforms/InstCombine/rem-mul-shl.ll | 8 +- llvm/test/Transforms/InstCombine/rem.ll | 22 +- .../reuse-constant-from-select-in-icmp.ll | 30 +- llvm/test/Transforms/InstCombine/rotate.ll | 16 +- .../InstCombine/sadd-with-overflow.ll | 6 +- llvm/test/Transforms/InstCombine/sadd_sat.ll | 14 +- .../InstCombine/saturating-add-sub.ll | 88 +- .../Transforms/InstCombine/scalarization.ll | 2 +- .../InstCombine/sdiv-canonicalize.ll | 4 +- .../sdiv-exact-by-negative-power-of-two.ll | 6 +- .../InstCombine/sdiv-exact-by-power-of-two.ll | 6 +- .../Transforms/InstCombine/select-and-or.ll | 24 +- .../Transforms/InstCombine/select-bitext.ll | 18 +- .../Transforms/InstCombine/select-divrem.ll | 2 +- .../InstCombine/select-extractelement.ll | 2 +- .../InstCombine/select-factorize.ll | 28 +- .../InstCombine/select-icmp-and-zero-shl.ll | 4 +- .../Transforms/InstCombine/select-icmp-and.ll | 40 +- .../InstCombine/select-masked_load.ll | 8 +- .../InstCombine/select-of-bittest.ll | 24 +- .../InstCombine/select-safe-transforms.ll | 24 +- .../InstCombine/select-value-equivalence.ll | 30 +- .../InstCombine/select-with-bitwise-ops.ll | 58 +- llvm/test/Transforms/InstCombine/select.ll | 50 +- .../Transforms/InstCombine/select_meta.ll | 2 +- .../set-lowbits-mask-canonicalize.ll | 16 +- llvm/test/Transforms/InstCombine/set.ll | 22 +- .../InstCombine/sext-of-trunc-nsw.ll | 2 +- llvm/test/Transforms/InstCombine/sext.ll | 8 +- llvm/test/Transforms/InstCombine/shift-add.ll | 16 +- ...ciation-in-bittest-with-truncation-lshr.ll | 16 +- ...ociation-in-bittest-with-truncation-shl.ll | 6 +- .../shift-amount-reassociation-in-bittest.ll | 4 +- ...ount-reassociation-with-truncation-ashr.ll | 2 +- ...ount-reassociation-with-truncation-lshr.ll | 2 +- ...mount-reassociation-with-truncation-shl.ll | 2 +- .../InstCombine/shift-amount-reassociation.ll | 4 +- .../Transforms/InstCombine/shift-logic.ll | 32 +- .../Transforms/InstCombine/shift-shift.ll | 26 +- llvm/test/Transforms/InstCombine/shift-sra.ll | 8 +- llvm/test/Transforms/InstCombine/shift.ll | 86 +- .../InstCombine/shl-and-negC-icmpeq-zero.ll | 8 +- .../shl-and-signbit-icmpeq-zero.ll | 8 +- llvm/test/Transforms/InstCombine/shl-bo.ll | 72 +- .../test/Transforms/InstCombine/shl-demand.ll | 10 +- llvm/test/Transforms/InstCombine/shl-sub.ll | 18 +- .../InstCombine/shl-unsigned-cmp-const.ll | 12 +- .../shuffle_select-inseltpoison.ll | 10 +- .../Transforms/InstCombine/shuffle_select.ll | 10 +- .../signbit-lshr-and-icmpeq-zero.ll | 2 +- .../signbit-shl-and-icmpeq-zero.ll | 2 +- ...mul-lack-of-overflow-check-via-mul-sdiv.ll | 2 +- .../InstCombine/signed-truncation-check.ll | 18 +- llvm/test/Transforms/InstCombine/signext.ll | 14 +- .../signmask-of-sext-vs-of-shl-of-zext.ll | 6 +- .../InstCombine/simplify-demanded-fpclass.ll | 5 +- llvm/test/Transforms/InstCombine/sitofp.ll | 8 +- llvm/test/Transforms/InstCombine/smin-icmp.ll | 118 +- llvm/test/Transforms/InstCombine/sqrt.ll | 2 +- .../InstCombine/ssub-with-overflow.ll | 6 +- .../InstCombine/sub-ashr-or-to-icmp-select.ll | 12 +- .../InstCombine/sub-lshr-or-to-icmp-select.ll | 2 +- llvm/test/Transforms/InstCombine/sub-not.ll | 8 +- .../sub-of-negatible-inseltpoison.ll | 6 +- .../InstCombine/sub-of-negatible.ll | 10 +- llvm/test/Transforms/InstCombine/sub-xor.ll | 16 +- llvm/test/Transforms/InstCombine/sub.ll | 62 +- .../Transforms/InstCombine/trunc-binop-ext.ll | 2 +- .../Transforms/InstCombine/trunc-demand.ll | 10 +- .../InstCombine/trunc-inseltpoison.ll | 26 +- .../InstCombine/trunc-shift-trunc.ll | 4 +- llvm/test/Transforms/InstCombine/trunc.ll | 26 +- .../InstCombine/truncating-saturate.ll | 14 +- .../InstCombine/uadd-with-overflow.ll | 16 +- llvm/test/Transforms/InstCombine/uaddo.ll | 2 +- .../udiv_select_to_select_shift.ll | 2 +- .../Transforms/InstCombine/umin_cttz_ctlz.ll | 4 +- ...old-masked-merge-with-const-mask-vector.ll | 54 +- ...gned-add-lack-of-overflow-check-via-xor.ll | 2 +- .../unsigned-add-lack-of-overflow-check.ll | 2 +- .../unsigned-add-overflow-check-via-xor.ll | 2 +- .../unsigned-add-overflow-check.ll | 2 +- ...mul-lack-of-overflow-check-via-mul-udiv.ll | 2 +- ...k-of-overflow-check-via-udiv-of-allones.ll | 4 +- .../InstCombine/unsigned_saturated_sub.ll | 2 +- ...signext-of-variable-high-bit-extraction.ll | 2 +- .../vec_demanded_elts-inseltpoison.ll | 2 +- .../InstCombine/vec_demanded_elts.ll | 2 +- .../vec_phi_extract-inseltpoison.ll | 2 +- .../Transforms/InstCombine/vec_phi_extract.ll | 2 +- llvm/test/Transforms/InstCombine/vec_sext.ll | 2 +- .../InstCombine/vec_shuffle-inseltpoison.ll | 28 +- .../Transforms/InstCombine/vec_shuffle.ll | 30 +- .../InstCombine/vec_udiv_to_shift.ll | 16 +- .../InstCombine/vector-casts-inseltpoison.ll | 20 +- .../Transforms/InstCombine/vector-casts.ll | 20 +- .../test/Transforms/InstCombine/vector-mul.ll | 40 +- .../vector-reduce-min-max-known.ll | 2 +- .../Transforms/InstCombine/vector-trunc.ll | 10 +- .../Transforms/InstCombine/vector-udiv.ll | 8 +- .../Transforms/InstCombine/vector-urem.ll | 10 +- .../test/Transforms/InstCombine/vector-xor.ll | 20 +- .../Transforms/InstCombine/with_overflow.ll | 32 +- .../test/Transforms/InstCombine/xor-and-or.ll | 12 +- llvm/test/Transforms/InstCombine/xor-ashr.ll | 8 +- llvm/test/Transforms/InstCombine/xor-icmps.ll | 2 +- llvm/test/Transforms/InstCombine/xor-of-or.ll | 6 +- llvm/test/Transforms/InstCombine/xor.ll | 56 +- llvm/test/Transforms/InstCombine/xor2.ll | 8 +- .../InstCombine/zext-bool-add-sub.ll | 2 +- .../zext-ctlz-trunc-to-ctlz-add.ll | 2 +- llvm/test/Transforms/InstCombine/zext.ll | 20 +- .../2011-09-05-InsertExtractValue.ll | 6 +- llvm/test/Transforms/InstSimplify/AndOrXor.ll | 34 +- .../InstSimplify/ConstProp/ARM/mve-vctp.ll | 30 +- .../ConstProp/active-lane-mask.ll | 24 +- .../InstSimplify/ConstProp/bitcast.ll | 2 +- .../Transforms/InstSimplify/ConstProp/cast.ll | 2 +- .../ConstProp/saturating-add-sub.ll | 18 +- .../InstSimplify/ConstProp/vectorgep-crash.ll | 2 +- .../vscale-shufflevector-inseltpoison.ll | 2 +- .../ConstProp/vscale-shufflevector.ll | 2 +- .../Transforms/InstSimplify/abs_intrinsic.ll | 8 +- llvm/test/Transforms/InstSimplify/add_vp.ll | 2 +- llvm/test/Transforms/InstSimplify/addsub.ll | 2 +- .../InstSimplify/bitcast-vector-fold.ll | 39 +- .../InstSimplify/bitreverse-fold.ll | 2 +- llvm/test/Transforms/InstSimplify/call.ll | 6 +- .../Transforms/InstSimplify/canonicalize.ll | 2 +- .../InstSimplify/cast-unsigned-icmp-cmp-0.ll | 12 +- .../InstSimplify/cmp-vec-fast-path.ll | 52 +- llvm/test/Transforms/InstSimplify/compare.ll | 40 +- ...constantfold-add-nuw-allones-to-allones.ll | 2 +- .../constantfold-shl-nuw-C-to-C.ll | 2 +- .../Transforms/InstSimplify/ctpop-pow2.ll | 10 +- llvm/test/Transforms/InstSimplify/div.ll | 6 +- llvm/test/Transforms/InstSimplify/exp10.ll | 28 +- .../InstSimplify/fast-math-strictfp.ll | 4 +- .../test/Transforms/InstSimplify/fast-math.ll | 4 +- llvm/test/Transforms/InstSimplify/fdiv.ll | 2 +- .../floating-point-arithmetic-strictfp.ll | 6 +- .../InstSimplify/floating-point-arithmetic.ll | 22 +- .../InstSimplify/floating-point-compare.ll | 26 +- .../Transforms/InstSimplify/fminmax-folds.ll | 14 +- llvm/test/Transforms/InstSimplify/fp-nan.ll | 4 +- .../Transforms/InstSimplify/fptoi-range.ll | 18 +- llvm/test/Transforms/InstSimplify/frexp.ll | 24 +- llvm/test/Transforms/InstSimplify/gep.ll | 14 +- .../InstSimplify/icmp-bool-constant.ll | 16 +- .../Transforms/InstSimplify/icmp-constant.ll | 42 +- .../InstSimplify/icmp-not-bool-constant.ll | 20 +- llvm/test/Transforms/InstSimplify/icmp.ll | 4 +- llvm/test/Transforms/InstSimplify/implies.ll | 2 +- .../Transforms/InstSimplify/insertelement.ll | 2 +- .../InstSimplify/known-never-infinity.ll | 8 +- .../Transforms/InstSimplify/known-non-zero.ll | 8 +- .../InstSimplify/maxmin_intrinsics.ll | 28 +- llvm/test/Transforms/InstSimplify/negate.ll | 2 +- .../InstSimplify/or-icmps-same-ops.ll | 2 +- llvm/test/Transforms/InstSimplify/or.ll | 36 +- llvm/test/Transforms/InstSimplify/pr28725.ll | 2 +- llvm/test/Transforms/InstSimplify/ptrmask.ll | 22 +- llvm/test/Transforms/InstSimplify/rem.ll | 2 +- llvm/test/Transforms/InstSimplify/returned.ll | 2 +- .../InstSimplify/saturating-add-sub.ll | 26 +- llvm/test/Transforms/InstSimplify/sdiv.ll | 6 +- .../InstSimplify/select-inseltpoison.ll | 8 +- .../Transforms/InstSimplify/select-logical.ll | 14 +- llvm/test/Transforms/InstSimplify/select.ll | 8 +- .../Transforms/InstSimplify/select_or_and.ll | 2 +- .../InstSimplify/shift-knownbits.ll | 6 +- llvm/test/Transforms/InstSimplify/shift.ll | 10 +- llvm/test/Transforms/InstSimplify/shr-nop.ll | 2 +- .../shufflevector-inseltpoison.ll | 4 +- .../Transforms/InstSimplify/shufflevector.ll | 6 +- .../Transforms/InstSimplify/strictfp-fadd.ll | 14 +- llvm/test/Transforms/InstSimplify/uscmp.ll | 4 +- llvm/test/Transforms/InstSimplify/vec-cmp.ll | 4 +- .../InstSimplify/vec-icmp-of-cast.ll | 8 +- .../Transforms/InstSimplify/vector_gep.ll | 2 +- llvm/test/Transforms/InstSimplify/xor.ll | 4 +- ...interleave-load-extract-shuffle-changes.ll | 4 +- .../LoopLoadElim/type-mismatch-opaque-ptr.ll | 2 +- .../Transforms/LoopLoadElim/type-mismatch.ll | 2 +- .../LoopUnroll/ARM/mve-upperbound.ll | 2 +- ...unrolling-legalize-vectors-inseltpoison.ll | 16 +- .../PowerPC/p8-unrolling-legalize-vectors.ll | 16 +- .../LoopVectorize/AArch64/blend-costs.ll | 8 +- .../LoopVectorize/AArch64/call-costs.ll | 4 +- .../AArch64/conditional-branches-cost.ll | 36 +- .../AArch64/deterministic-type-shrinkage.ll | 108 +- .../epilog-vectorization-widen-inductions.ll | 36 +- .../first-order-recurrence-fold-tail.ll | 6 +- .../AArch64/force-target-instruction-cost.ll | 36 +- .../AArch64/induction-costs-sve.ll | 12 +- .../LoopVectorize/AArch64/induction-costs.ll | 338 ++-- .../AArch64/loop-vectorization-factors.ll | 200 +-- ...outer_loop_test1_no_explicit_vect_width.ll | 20 +- ...ng-compatible-sve-no-maximize-bandwidth.ll | 4 +- .../LoopVectorize/AArch64/strict-fadd.ll | 34 +- .../LoopVectorize/AArch64/sve-epilog-vect.ll | 6 +- .../LoopVectorize/AArch64/sve-illegal-type.ll | 2 +- .../LoopVectorize/AArch64/sve-tail-folding.ll | 2 +- .../AArch64/synthesize-mask-for-call.ll | 2 +- .../AArch64/tail-fold-uniform-memops.ll | 4 +- .../AArch64/type-shrinkage-insertelt.ll | 2 +- .../AArch64/vector-reverse-mask4.ll | 4 +- .../ARM/mve-gather-scatter-tailpred.ll | 44 +- .../Transforms/LoopVectorize/ARM/mve-qabs.ll | 12 +- .../ARM/mve-reduction-predselect.ll | 12 +- .../LoopVectorize/ARM/mve-reduction-types.ll | 12 +- .../LoopVectorize/ARM/mve-selectandorcost.ll | 2 +- .../LoopVectorize/ARM/pointer_iv.ll | 30 +- .../ARM/tail-fold-multiple-icmps.ll | 4 +- .../ARM/tail-folding-not-allowed.ll | 16 +- .../LoopVectorize/PowerPC/exit-branch-cost.ll | 32 +- .../PowerPC/optimal-epilog-vectorization.ll | 36 +- .../Transforms/LoopVectorize/RISCV/divrem.ll | 32 +- .../RISCV/interleaved-accesses.ll | 92 +- .../LoopVectorize/RISCV/low-trip-count.ll | 6 +- .../Transforms/LoopVectorize/RISCV/pr88802.ll | 10 +- .../LoopVectorize/RISCV/riscv-interleaved.ll | 2 +- .../RISCV/select-cmp-reduction.ll | 20 +- .../truncate-to-minimal-bitwidth-cost.ll | 2 +- .../LoopVectorize/RISCV/uniform-load-store.ll | 24 +- ...vectorize-force-tail-with-evl-reduction.ll | 8 +- .../vf-will-not-generate-any-vector-insts.ll | 4 +- .../SystemZ/force-target-instruction-cost.ll | 2 +- .../LoopVectorize/SystemZ/pr47665.ll | 8 +- .../predicated-first-order-recurrence.ll | 6 +- .../X86/consecutive-ptr-uniforms.ll | 8 +- .../LoopVectorize/X86/conversion-cost.ll | 20 +- .../X86/cost-constant-known-via-scev.ll | 16 +- .../LoopVectorize/X86/cost-model.ll | 26 +- .../X86/divs-with-tail-folding.ll | 2 +- .../X86/drop-poison-generating-flags.ll | 46 +- .../X86/epilog-vectorization-inductions.ll | 132 +- .../X86/fixed-order-recurrence.ll | 12 +- .../LoopVectorize/X86/float-induction-x86.ll | 24 +- .../LoopVectorize/X86/gather_scatter.ll | 66 +- .../LoopVectorize/X86/gep-use-outside-loop.ll | 10 +- .../illegal-parallel-loop-uniform-write.ll | 12 +- .../X86/imprecise-through-phis.ll | 12 +- .../LoopVectorize/X86/induction-costs.ll | 80 +- .../LoopVectorize/X86/interleave-cost.ll | 8 +- ...rleaved-accesses-sink-store-across-load.ll | 30 +- .../LoopVectorize/X86/load-deref-pred.ll | 8 +- .../LoopVectorize/X86/masked-store-cost.ll | 20 +- .../LoopVectorize/X86/masked_load_store.ll | 292 ++-- .../Transforms/LoopVectorize/X86/optsize.ll | 24 +- ...outer_loop_test1_no_explicit_vect_width.ll | 20 +- .../X86/pr109581-unused-blend.ll | 8 +- .../Transforms/LoopVectorize/X86/pr36524.ll | 8 +- .../Transforms/LoopVectorize/X86/pr48340.ll | 4 +- ...6-sunk-instruction-used-outside-of-loop.ll | 4 +- .../Transforms/LoopVectorize/X86/pr54634.ll | 24 +- .../X86/pr55096-scalarize-add.ll | 6 +- .../Transforms/LoopVectorize/X86/pr72969.ll | 4 +- .../Transforms/LoopVectorize/X86/pr81872.ll | 21 +- .../LoopVectorize/X86/predicate-switch.ll | 244 +-- .../LoopVectorize/X86/reduction-fastmath.ll | 16 +- .../X86/replicate-uniform-call.ll | 4 +- .../LoopVectorize/X86/scatter_crash.ll | 250 ++- .../LoopVectorize/X86/small-size.ll | 6 +- .../LoopVectorize/X86/strided_load_cost.ll | 16 +- .../LoopVectorize/X86/tail_loop_folding.ll | 4 +- .../LoopVectorize/X86/uniform_mem_op.ll | 28 +- .../X86/vect.omp.force.small-tc.ll | 2 +- .../X86/vectorize-interleaved-accesses-gap.ll | 4 +- .../X86/vplan-native-inner-loop-only.ll | 2 +- ...ned-value-used-as-scalar-and-first-lane.ll | 32 +- .../x86-interleaved-accesses-masked-group.ll | 66 +- ...86-interleaved-store-accesses-with-gaps.ll | 22 +- .../LoopVectorize/X86/x86-pr39099.ll | 2 +- .../LoopVectorize/X86/x86-predication.ll | 8 +- llvm/test/Transforms/LoopVectorize/assume.ll | 4 +- .../LoopVectorize/blend-in-header.ll | 6 +- .../Transforms/LoopVectorize/bsd_regex.ll | 10 +- .../LoopVectorize/cast-induction.ll | 4 +- .../LoopVectorize/create-induction-resume.ll | 2 +- .../LoopVectorize/dbg-outer-loop-vect.ll | 40 +- .../LoopVectorize/dead_instructions.ll | 14 +- .../test/Transforms/LoopVectorize/debugloc.ll | 10 +- .../dont-fold-tail-for-const-TC.ll | 6 +- .../dont-fold-tail-for-divisible-TC.ll | 8 +- .../epilog-vectorization-any-of-reductions.ll | 12 +- .../epilog-vectorization-reductions.ll | 6 +- ...log-vectorization-trunc-induction-steps.ll | 4 +- .../LoopVectorize/extract-last-veclane.ll | 4 +- .../first-order-recurrence-chains.ll | 16 +- .../first-order-recurrence-complex.ll | 38 +- ...t-order-recurrence-multiply-recurrences.ll | 10 +- .../LoopVectorize/first-order-recurrence.ll | 82 +- .../LoopVectorize/float-induction.ll | 32 +- .../Transforms/LoopVectorize/icmp-uniforms.ll | 2 +- .../LoopVectorize/if-conversion-nest.ll | 24 +- .../LoopVectorize/if-pred-non-void.ll | 136 +- .../LoopVectorize/if-pred-not-when-safe.ll | 8 +- .../LoopVectorize/if-pred-stores.ll | 6 +- .../Transforms/LoopVectorize/if-reduction.ll | 26 +- .../LoopVectorize/induction-step.ll | 2 +- .../Transforms/LoopVectorize/induction.ll | 324 ++-- .../LoopVectorize/induction_plus.ll | 2 +- .../instruction-only-used-outside-of-loop.ll | 14 +- .../LoopVectorize/interleaved-accesses-3.ll | 2 +- .../LoopVectorize/interleaved-accesses.ll | 34 +- .../invariant-store-vectorization.ll | 6 +- .../LoopVectorize/iv_outside_user.ll | 20 +- .../load-of-struct-deref-pred.ll | 20 +- .../Transforms/LoopVectorize/loop-form.ll | 22 +- .../Transforms/LoopVectorize/loop-scalars.ll | 2 +- .../LoopVectorize/memdep-fold-tail.ll | 6 +- .../LoopVectorize/multiple-address-spaces.ll | 2 +- ...o-fold-tail-by-masking-iv-external-uses.ll | 2 +- .../LoopVectorize/no_outside_user.ll | 102 +- .../optimal-epilog-vectorization.ll | 20 +- llvm/test/Transforms/LoopVectorize/optsize.ll | 4 +- .../outer-loop-vec-phi-predecessor-order.ll | 14 +- .../outer_loop_hcfg_construction.ll | 16 +- .../LoopVectorize/outer_loop_test1.ll | 10 +- .../LoopVectorize/outer_loop_test2.ll | 12 +- .../test/Transforms/LoopVectorize/phi-cost.ll | 10 +- .../LoopVectorize/pointer-induction.ll | 4 +- llvm/test/Transforms/LoopVectorize/pr35773.ll | 4 +- llvm/test/Transforms/LoopVectorize/pr37248.ll | 2 +- .../pr39417-optsize-scevchecks.ll | 2 +- .../LoopVectorize/pr44488-predication.ll | 6 +- llvm/test/Transforms/LoopVectorize/pr45259.ll | 6 +- llvm/test/Transforms/LoopVectorize/pr45525.ll | 2 +- .../pr45679-fold-tail-by-masking.ll | 24 +- .../pr51614-fold-tail-by-masking.ll | 4 +- .../pr55167-fold-tail-live-out.ll | 18 +- llvm/test/Transforms/LoopVectorize/pr66616.ll | 2 +- .../LoopVectorize/predicate-switch.ll | 40 +- .../LoopVectorize/preserve-or-disjoint.ll | 4 +- .../LoopVectorize/reduction-inloop-cond.ll | 16 +- .../LoopVectorize/reduction-inloop-min-max.ll | 4 +- .../LoopVectorize/reduction-inloop-pred.ll | 100 +- .../LoopVectorize/reduction-inloop-uf4.ll | 34 +- .../LoopVectorize/reduction-inloop.ll | 36 +- .../LoopVectorize/reduction-order.ll | 4 +- .../LoopVectorize/reduction-predselect.ll | 50 +- .../LoopVectorize/reduction-small-size.ll | 12 +- .../reduction-with-invariant-store.ll | 6 +- .../Transforms/LoopVectorize/reduction.ll | 24 +- .../LoopVectorize/reverse_induction.ll | 16 +- .../runtime-check-small-clamped-bounds.ll | 8 +- .../Transforms/LoopVectorize/runtime-check.ll | 2 +- .../LoopVectorize/runtime-checks-hoist.ll | 2 +- .../LoopVectorize/scalarize-masked-call.ll | 4 +- .../LoopVectorize/scev-predicate-reasoning.ll | 2 +- .../LoopVectorize/select-cmp-multiuse.ll | 24 +- .../LoopVectorize/select-cmp-predicated.ll | 4 +- .../Transforms/LoopVectorize/select-cmp.ll | 38 +- ...tion-start-value-may-be-undef-or-poison.ll | 54 +- .../LoopVectorize/select-reduction.ll | 6 +- .../LoopVectorize/single-value-blend-phis.ll | 24 +- .../tail-folding-alloca-in-loop.ll | 10 +- .../tail-folding-counting-down.ll | 4 +- .../LoopVectorize/tail-folding-switch.ll | 6 +- .../trip-count-expansion-may-introduce-ub.ll | 30 +- .../LoopVectorize/trunc-extended-icmps.ll | 2 +- .../Transforms/LoopVectorize/trunc-shifts.ll | 80 +- .../Transforms/LoopVectorize/uniform-blend.ll | 2 +- .../uniform_across_vf_induction1.ll | 90 +- .../uniform_across_vf_induction1_and.ll | 54 +- .../uniform_across_vf_induction1_div_urem.ll | 26 +- .../uniform_across_vf_induction1_lshr.ll | 88 +- .../uniform_across_vf_induction2.ll | 352 ++--- .../unused-blend-mask-for-first-operand.ll | 2 +- .../Transforms/LoopVectorize/vector-geps.ll | 2 +- .../vector-intrinsic-call-cost.ll | 2 +- .../version-stride-with-integer-casts.ll | 6 +- .../vplan-vectorize-inner-loop-reduction.ll | 12 +- .../vplan-widen-call-instruction.ll | 12 +- .../vplan-widen-select-instruction.ll | 8 +- .../dot-product-int-row-major.ll | 16 +- .../multiply-add-sub-double-row-major.ll | 6 +- llvm/test/Transforms/MemCpyOpt/form-memset.ll | 2 +- llvm/test/Transforms/NewGVN/completeness.ll | 4 +- ...ting-sinking-required-for-vectorization.ll | 12 +- .../AArch64/matrix-extract-insert.ll | 16 +- .../AArch64/predicated-reduction.ll | 24 +- .../PhaseOrdering/AArch64/quant_4x4.ll | 8 +- .../PhaseOrdering/AArch64/slpordering.ll | 8 +- .../PhaseOrdering/ARM/arm_mult_q15.ll | 4 +- .../X86/hoist-load-of-baseptr.ll | 8 +- .../PhaseOrdering/X86/pixel-splat.ll | 8 +- .../X86/pr48844-br-to-switch-vectorization.ll | 24 +- .../Transforms/PhaseOrdering/X86/pr50555.ll | 24 +- .../Transforms/PhaseOrdering/X86/pr88239.ll | 2 +- .../PhaseOrdering/X86/shuffle-inseltpoison.ll | 6 +- .../Transforms/PhaseOrdering/X86/shuffle.ll | 6 +- .../PhaseOrdering/X86/speculation-vs-tbaa.ll | 4 +- .../PhaseOrdering/X86/vdiv-nounroll.ll | 2 +- .../test/Transforms/PhaseOrdering/X86/vdiv.ll | 8 +- .../Transforms/PhaseOrdering/X86/vec-shift.ll | 24 +- .../X86/vector-reductions-logical.ll | 14 +- .../PhaseOrdering/X86/vector-reductions.ll | 4 +- .../expand-vp-load-store.ll | 4 +- .../PreISelIntrinsicLowering/expand-vp.ll | 44 +- .../Reassociate/fast-ReassociateVector.ll | 44 +- llvm/test/Transforms/Reassociate/negation.ll | 6 +- .../Transforms/Reassociate/xor_reassoc.ll | 18 +- .../vector-nonlive-clobber.ll | 6 +- .../test/Transforms/SCCP/add-nuw-nsw-flags.ll | 8 +- llvm/test/Transforms/SCCP/intrinsics.ll | 2 +- llvm/test/Transforms/SCCP/ip-ranges-casts.ll | 2 +- llvm/test/Transforms/SCCP/overdefined-ext.ll | 8 +- .../Transforms/SCCP/trunc-nuw-nsw-flags.ll | 2 +- llvm/test/Transforms/SCCP/vector-bitcast.ll | 4 +- .../Transforms/SLPVectorizer/AArch64/div.ll | 4 +- .../AArch64/external-use-icmp.ll | 4 +- .../AArch64/extractelements-to-shuffle.ll | 4 +- .../SLPVectorizer/AArch64/gather-cost.ll | 6 +- .../SLPVectorizer/AArch64/gather-root.ll | 12 +- .../SLPVectorizer/AArch64/loadi8.ll | 2 +- .../SLPVectorizer/AArch64/loadorder.ll | 8 +- .../AArch64/memory-runtime-checks.ll | 6 +- .../SLPVectorizer/AArch64/sdiv-pow2.ll | 2 +- .../AArch64/transpose-inseltpoison.ll | 6 +- .../SLPVectorizer/AArch64/transpose.ll | 6 +- .../SLPVectorizer/AArch64/trunc-insertion.ll | 2 +- .../SLPVectorizer/AArch64/vec15-base.ll | 6 +- .../SLPVectorizer/AArch64/vec3-base.ll | 12 +- .../AArch64/vec3-reorder-reshuffle.ll | 8 +- .../AArch64/vectorizable-selects-min-max.ll | 102 +- .../vectorizable-selects-uniform-cmps.ll | 16 +- .../Transforms/SLPVectorizer/AArch64/widen.ll | 4 +- .../Transforms/SLPVectorizer/NVPTX/v2f16.ll | 4 +- .../SLPVectorizer/RISCV/complex-loads.ll | 64 +- .../SLPVectorizer/RISCV/floating-point.ll | 12 +- .../RISCV/gather-node-with-no-users.ll | 4 +- .../RISCV/getpointerschaincost.ll | 6 +- .../SLPVectorizer/RISCV/load-binop-store.ll | 20 +- .../SLPVectorizer/RISCV/load-store.ll | 8 +- .../RISCV/minbw-with-and-and-scalar-trunc.ll | 4 +- .../SLPVectorizer/RISCV/phi-const.ll | 2 +- .../reduced-value-repeated-and-vectorized.ll | 2 +- .../reduction-extension-after-bitwidth.ll | 2 +- .../SLPVectorizer/RISCV/reductions.ll | 2 +- .../RISCV/remarks-insert-into-small-vector.ll | 2 +- .../Transforms/SLPVectorizer/RISCV/revec.ll | 2 +- ...reversed-strided-node-with-external-ptr.ll | 4 +- .../RISCV/scatter-vectorize-reversed.ll | 2 +- .../RISCV/select-profitability.ll | 4 +- .../RISCV/smin-signed-zextended.ll | 4 +- .../RISCV/strided-loads-vectorized.ll | 16 +- .../strided-loads-with-external-indices.ll | 2 +- .../strided-loads-with-external-use-ptr.ll | 2 +- .../SLPVectorizer/RISCV/strided-loads.ll | 2 +- .../RISCV/strided-stores-vectorized.ll | 2 +- .../RISCV/trunc-bv-multi-uses.ll | 2 +- .../RISCV/trunc-to-large-than-bw.ll | 4 +- .../unsigned-node-trunc-with-signed-users.ll | 10 +- .../SLPVectorizer/RISCV/vec15-base.ll | 8 +- .../SLPVectorizer/RISCV/vec3-base.ll | 18 +- .../SystemZ/ext-not-resized-op-resized.ll | 4 +- .../SLPVectorizer/SystemZ/pr34619.ll | 2 +- .../Transforms/SLPVectorizer/X86/PR35628_2.ll | 4 +- .../X86/alternate-int-inseltpoison.ll | 8 +- .../SLPVectorizer/X86/alternate-int.ll | 8 +- .../SLPVectorizer/X86/arith-and-const-load.ll | 12 +- .../Transforms/SLPVectorizer/X86/arith-div.ll | 88 +- .../SLPVectorizer/X86/arith-fshl-rot.ll | 4 +- .../SLPVectorizer/X86/arith-fshr-rot.ll | 4 +- .../SLPVectorizer/X86/barriercall.ll | 2 +- .../Transforms/SLPVectorizer/X86/c-ray.ll | 2 +- .../X86/cast-operand-extracted.ll | 4 +- .../Transforms/SLPVectorizer/X86/cmp_sel.ll | 2 +- .../X86/combined-stores-chains.ll | 2 +- .../SLPVectorizer/X86/compare-reduce.ll | 6 +- .../SLPVectorizer/X86/crash_bullet3.ll | 6 +- .../SLPVectorizer/X86/crash_cmpop.ll | 16 +- .../X86/crash_scheduling-inseltpoison.ll | 2 +- .../SLPVectorizer/X86/crash_scheduling.ll | 2 +- .../SLPVectorizer/X86/crash_sim4b1.ll | 2 +- .../SLPVectorizer/X86/crash_smallpt.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/cse.ll | 8 +- .../SLPVectorizer/X86/debug-counter.ll | 8 +- .../SLPVectorizer/X86/different-vec-widths.ll | 10 +- .../X86/external-used-across-reductions.ll | 4 +- .../SLPVectorizer/X86/external_user.ll | 6 +- .../SLPVectorizer/X86/extractcost.ll | 2 +- .../SLPVectorizer/X86/fabs-cost-softfp.ll | 2 +- .../X86/gather-node-same-as-vect-but-order.ll | 2 +- .../SLPVectorizer/X86/gather-with-cmp-user.ll | 2 +- .../X86/gep-nodes-with-non-gep-inst.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/gep.ll | 2 +- .../SLPVectorizer/X86/geps-non-pow-2.ll | 6 +- .../SLPVectorizer/X86/horizontal-list.ll | 2 +- .../SLPVectorizer/X86/horizontal.ll | 2 +- .../SLPVectorizer/X86/insert-after-bundle.ll | 40 +- .../SLPVectorizer/X86/long_chains.ll | 12 +- .../X86/matched-shuffled-entries.ll | 6 +- .../X86/matching-gather-nodes-phi-users.ll | 4 +- .../X86/minbitwidth-icmp-to-trunc.ll | 20 +- .../SLPVectorizer/X86/minimum-sizes.ll | 8 +- .../Transforms/SLPVectorizer/X86/mul64.ll | 4 +- .../X86/non-power-of-2-order-detection.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/phi.ll | 8 +- .../test/Transforms/SLPVectorizer/X86/phi3.ll | 2 +- .../Transforms/SLPVectorizer/X86/powof2div.ll | 2 +- .../Transforms/SLPVectorizer/X86/powof2mul.ll | 28 +- .../Transforms/SLPVectorizer/X86/pr23510.ll | 10 +- .../Transforms/SLPVectorizer/X86/pr35497.ll | 20 +- .../Transforms/SLPVectorizer/X86/pr40522.ll | 4 +- .../SLPVectorizer/X86/pr44067-inseltpoison.ll | 2 +- .../Transforms/SLPVectorizer/X86/pr44067.ll | 2 +- .../Transforms/SLPVectorizer/X86/pr46983.ll | 16 +- .../SLPVectorizer/X86/pr47629-inseltpoison.ll | 16 +- .../Transforms/SLPVectorizer/X86/pr47629.ll | 16 +- .../SLPVectorizer/X86/pr48879-sroa.ll | 4 +- .../Transforms/SLPVectorizer/X86/pr49933.ll | 2 +- .../SLPVectorizer/X86/propagate_ir_flags.ll | 38 +- .../X86/reduction-bool-logic-op-inside.ll | 2 +- .../SLPVectorizer/X86/reduction-logical.ll | 10 +- .../Transforms/SLPVectorizer/X86/reduction.ll | 2 +- .../SLPVectorizer/X86/reduction2.ll | 8 +- .../X86/redux-feed-buildvector.ll | 4 +- .../X86/redux-feed-insertelement.ll | 2 +- .../X86/remark_gather-load-redux-cost.ll | 4 +- .../X86/reorder-reused-masked-gather.ll | 2 +- .../Transforms/SLPVectorizer/X86/resched.ll | 2 +- .../X86/reuse-extracts-in-wider-vect.ll | 2 +- .../X86/reused-scalars-in-buildvector.ll | 2 +- .../scatter-vectorize-reorder-non-empty.ll | 2 +- .../SLPVectorizer/X86/schedule-bundle.ll | 8 +- .../SLPVectorizer/X86/shrink_after_reorder.ll | 2 +- .../SLPVectorizer/X86/simple-loop.ll | 6 +- .../X86/sitofp-minbitwidth-node.ll | 2 +- .../X86/split-load8_2_unord_geps.ll | 8 +- .../X86/stackrestore-dependence.ll | 2 +- .../SLPVectorizer/X86/stacksave-dependence.ll | 14 +- .../SLPVectorizer/X86/stores_vectorize.ll | 6 +- .../subvector-minbitwidth-unsigned-value.ll | 2 +- .../Transforms/SLPVectorizer/X86/supernode.ll | 2 +- .../SLPVectorizer/X86/unreachable.ll | 2 +- .../Transforms/SLPVectorizer/X86/vec3-base.ll | 4 +- .../X86/vec_list_bias-inseltpoison.ll | 2 +- .../SLPVectorizer/X86/vec_list_bias.ll | 2 +- .../vec_list_bias_external_insert_shuffled.ll | 2 +- .../X86/vect_copyable_in_binops.ll | 8 +- .../SLPVectorizer/X86/vectorize-pair-path.ll | 2 +- .../abs-overflow-incorrect-minbws.ll | 2 +- .../SLPVectorizer/alternate-non-profitable.ll | 2 +- .../call-arg-reduced-by-minbitwidth.ll | 4 +- .../extended-vectorized-gathered-inst.ll | 2 +- .../SLPVectorizer/freeze-signedness-missed.ll | 2 +- ...nsert-element-build-vector-inseltpoison.ll | 2 +- .../insert-element-build-vector.ll | 2 +- .../SLPVectorizer/jumbled_store_crash.ll | 2 +- .../SLPVectorizer/operand-is-reduced-val.ll | 4 +- .../SLPVectorizer/phi-node-bitwidt-op-not.ll | 6 +- .../reduction-whole-regs-loads.ll | 4 +- .../SLPVectorizer/reduction_loads.ll | 2 +- .../reudction-or-non-poisoned.ll | 2 +- ...ed-buildvector-matching-vectorized-node.ll | 4 +- .../SLPVectorizer/revec-fix-109835.ll | 8 +- .../SLPVectorizer/shrink_after_reorder2.ll | 4 +- llvm/test/Transforms/SROA/tbaa-struct3.ll | 2 +- .../AArch64/expand-masked-load.ll | 2 +- .../AArch64/expand-masked-store.ll | 2 +- .../Scalarizer/phi-unreachable-pred.ll | 2 +- .../nontrivial-unswitch-select.ll | 2 +- .../X86/hoist-loads-stores-with-cf.ll | 16 +- .../SimplifyCFG/preserve-store-alignment.ll | 30 +- .../StraightLineStrengthReduce/slsr-add.ll | 6 +- .../VectorCombine/AArch64/select-shuffle.ll | 16 +- .../VectorCombine/AArch64/shrink-types.ll | 10 +- .../AArch64/shuffletoidentity.ll | 4 +- .../AArch64/vecreduce-shuffle.ll | 42 +- ...ntrin-scalarization-shufflevector-splat.ll | 2 +- .../RISCV/vpintrin-scalarization.ll | 10 +- .../X86/insert-binop-with-constant.ll | 18 +- .../VectorCombine/X86/scalarize-cmp.ll | 10 +- .../VectorCombine/X86/shuffle-inseltpoison.ll | 8 +- .../VectorCombine/X86/shuffle-of-binops.ll | 2 +- .../Transforms/VectorCombine/X86/shuffle.ll | 8 +- llvm/test/tools/llvm-reduce/reduce-opcodes.ll | 2 +- .../tools/llvm-reduce/reduce-operands-fp.ll | 28 +- .../tools/llvm-reduce/reduce-operands-int.ll | 12 +- mlir/test/Target/LLVMIR/llvmir.mlir | 20 +- 1095 files changed, 17398 insertions(+), 17108 deletions(-) diff --git a/clang/test/CodeGen/PowerPC/altivec.c b/clang/test/CodeGen/PowerPC/altivec.c index 808135de00ea6b..6eb955dcdec204 100644 --- a/clang/test/CodeGen/PowerPC/altivec.c +++ b/clang/test/CodeGen/PowerPC/altivec.c @@ -12,8 +12,8 @@ // RUN: %clang -S -emit-llvm -maltivec -mabi=vec-default -mcpu=pwr8 --target=powerpc64-unknown-aix -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE // Check initialization -vector int test0 = (vector int)(1); // CHECK: @test0 ={{.*}} global <4 x i32> -vector float test1 = (vector float)(1.0); // CHECK: @test1 ={{.*}} global <4 x float> +vector int test0 = (vector int)(1); // CHECK: @test0 ={{.*}} global <4 x i32> splat (i32 1) +vector float test1 = (vector float)(1.0); // CHECK: @test1 ={{.*}} global <4 x float> splat (float 1.000000e+{{0+}}) // CHECK-BE: @v1 ={{.*}} global <16 x i8> // CHECK-LE: @v1 ={{.*}} global <16 x i8> @@ -32,8 +32,8 @@ void test2(void) { vector int vi; vector float vf; - vi = (vector int)(1); // CHECK: - vf = (vector float)(1.0); // CHECK: + vi = (vector int)(1); // CHECK: splat (i32 1) + vf = (vector float)(1.0); // CHECK: splat (float 1.000000e+{{0+}}) vi = (vector int)(1, 2, 3, 4); // CHECK: vi = (vector int)(1, 2, 3, 4, 5); // CHECK: @@ -46,9 +46,9 @@ void test2(void) // Check pre/post increment/decrement void test3(void) { vector int vi; - vi++; // CHECK: add <4 x i32> {{.*}} + vi++; // CHECK: add <4 x i32> {{.*}} splat (i32 1) vector unsigned int vui; - --vui; // CHECK: add <4 x i32> {{.*}} + --vui; // CHECK: add <4 x i32> {{.*}} splat (i32 -1) vector float vf; - vf++; // CHECK: fadd <4 x float> {{.*}} + vf++; // CHECK: fadd <4 x float> {{.*}} splat (float 1.000000e+{{0+}}) } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-altivec.c b/clang/test/CodeGen/PowerPC/builtins-ppc-altivec.c index 90c28ddd316ee3..91d1ebd045c58e 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-altivec.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-altivec.c @@ -92,10 +92,10 @@ void test1() { // CHECK-LE: @llvm.ppc.altivec.vmaxsw vf = vec_abs(vf); -// CHECK: and <4 x i32> {{.*}}, +// CHECK: and <4 x i32> {{.*}}, splat (i32 2147483647) // CHECK: store <4 x float> %{{.*}}, ptr @vf // CHECK-LE: bitcast <4 x float> %{{.*}} to <4 x i32> -// CHECK-LE: and <4 x i32> {{.*}}, +// CHECK-LE: and <4 x i32> {{.*}}, splat (i32 2147483647) // CHECK-LE: bitcast <4 x i32> %{{.*}} to <4 x float> // CHECK-LE: store <4 x float> %{{.*}}, ptr @vf @@ -3502,39 +3502,39 @@ void test6() { /* vec_sl */ res_vsc = vec_sl(vsc, vuc); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vuc = vec_sl(vuc, vuc); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vs = vec_sl(vs, vus); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vus = vec_sl(vus, vus); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vi = vec_sl(vi, vui); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vui = vec_sl(vui, vui); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vsc = vec_vslb(vsc, vuc); @@ -4351,75 +4351,75 @@ void test6() { /* vec_sr */ res_vsc = vec_sr(vsc, vuc); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vuc = vec_sr(vuc, vuc); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vs = vec_sr(vs, vus); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vus = vec_sr(vus, vus); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vi = vec_sr(vi, vui); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vui = vec_sr(vui, vui); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vsc = vec_vsrb(vsc, vuc); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vuc = vec_vsrb(vuc, vuc); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8) // CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vs = vec_vsrh(vs, vus); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vus = vec_vsrh(vus, vus); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16) // CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vi = vec_vsrw(vi, vui); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vui = vec_vsrw(vui, vui); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32) // CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]] /* vec_sra */ @@ -5623,21 +5623,21 @@ void test6() { res_vi = vec_sube(vi, vi, vi); // CHECK: and <4 x i32> -// CHECK: xor <4 x i32> {{%[0-9]+}}, +// CHECK: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1) // CHECK: add <4 x i32> // CHECK: add <4 x i32> // CHECK-LE: and <4 x i32> -// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, +// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1) // CHECK-LE: add <4 x i32> // CHECK-LE: add <4 x i32> res_vui = vec_sube(vui, vui, vui); // CHECK: and <4 x i32> -// CHECK: xor <4 x i32> {{%[0-9]+}}, +// CHECK: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1) // CHECK: add <4 x i32> // CHECK: add <4 x i32> // CHECK-LE: and <4 x i32> -// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, +// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1) // CHECK-LE: add <4 x i32> // CHECK-LE: add <4 x i32> @@ -9407,7 +9407,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -9415,7 +9415,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -9423,7 +9423,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -9431,7 +9431,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -9439,7 +9439,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -9447,7 +9447,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -9455,7 +9455,7 @@ void test8() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) } @@ -9529,10 +9529,10 @@ void test10() { vector float test_rsqrtf(vector float a, vector float b) { // CHECK-LABEL: test_rsqrtf // CHECK: call fast <4 x float> @llvm.sqrt.v4f32 - // CHECK: fdiv fast <4 x float> + // CHECK: fdiv fast <4 x float> splat (float 1.000000e+00) // CHECK-LE-LABEL: test_rsqrtf // CHECK-LE: call fast <4 x float> @llvm.sqrt.v4f32 - // CHECK-LE: fdiv fast <4 x float> + // CHECK-LE: fdiv fast <4 x float> splat (float 1.000000e+00) return vec_rsqrt(a); } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c b/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c index 5c1246c7a1e816..1089115a4f9627 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c @@ -42,7 +42,7 @@ vector double test_flags_recipdivd() { // CHECK-LABEL: @test_flags_rsqrtf( // CHECK: [[TMP0:%.*]] = load <4 x float>, ptr @a, align 16 // CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) -// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <4 x float> , [[TMP1]] +// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <4 x float> splat (float 1.000000e+00), [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr @b, align 16 // CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[RSQRT]], [[TMP2]] // CHECK-NEXT: ret <4 x float> [[ADD]] @@ -54,7 +54,7 @@ vector float test_flags_rsqrtf() { // CHECK-LABEL: @test_flags_rsqrtd( // CHECK: [[TMP0:%.*]] = load <2 x double>, ptr @d, align 16 // CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) -// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x double> , [[TMP1]] +// CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x double> splat (double 1.000000e+00), [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr @e, align 16 // CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[RSQRT]], [[TMP2]] // CHECK-NEXT: ret <2 x double> [[ADD]] diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c index 151a29976d8ed5..abf6ed3d3b0d7a 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c @@ -1535,17 +1535,17 @@ vector unsigned long long test_vec_extracth_ul(void) { } vector signed int test_vec_vec_splati_si(void) { - // CHECK: ret <4 x i32> + // CHECK: ret <4 x i32> splat (i32 -17) return vec_splati(-17); } vector unsigned int test_vec_vec_splati_ui(void) { - // CHECK: ret <4 x i32> + // CHECK: ret <4 x i32> splat (i32 16) return vec_splati(16U); } vector float test_vec_vec_splati_f(void) { - // CHECK: ret <4 x float> + // CHECK: ret <4 x float> splat (float 1.000000e+00) return vec_splati(1.0f); } @@ -1863,7 +1863,7 @@ vector bool __int128 test_vec_cmpeq_bool_int128(void) { vector bool __int128 test_vec_cmpne_s128(void) { // CHECK-LABEL: @test_vec_cmpne_s128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128> - // CHECK-NEXT: %not.i = xor <1 x i128> %4, + // CHECK-NEXT: %not.i = xor <1 x i128> %4, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> %not.i return vec_cmpne(vsi128a, vsi128b); } @@ -1871,7 +1871,7 @@ vector bool __int128 test_vec_cmpne_s128(void) { vector bool __int128 test_vec_cmpne_u128(void) { // CHECK-LABEL: @test_vec_cmpne_u128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128> - // CHECK-NEXT: xor <1 x i128> %4, + // CHECK-NEXT: xor <1 x i128> %4, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> return vec_cmpne(vui128a, vui128b); } @@ -1879,7 +1879,7 @@ vector bool __int128 test_vec_cmpne_u128(void) { vector bool __int128 test_vec_cmpne_bool_int128(void) { // CHECK-LABEL: @test_vec_cmpne_bool_int128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128> - // CHECK-NEXT: xor <1 x i128> %4, + // CHECK-NEXT: xor <1 x i128> %4, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> return vec_cmpne(vbi128a, vbi128b); } @@ -1915,7 +1915,7 @@ vector bool __int128 test_vec_cmplt_u128(void) { vector bool __int128 test_vec_cmpge_s128(void) { // CHECK-LABEL: @test_vec_cmpge_s128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpgtsq(<1 x i128> - // CHECK-NEXT: xor <1 x i128> %6, + // CHECK-NEXT: xor <1 x i128> %6, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> return vec_cmpge(vsi128a, vsi128b); } @@ -1923,7 +1923,7 @@ vector bool __int128 test_vec_cmpge_s128(void) { vector bool __int128 test_vec_cmpge_u128(void) { // CHECK-LABEL: @test_vec_cmpge_u128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpgtuq(<1 x i128> - // CHECK-NEXT: xor <1 x i128> %6, + // CHECK-NEXT: xor <1 x i128> %6, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> return vec_cmpge(vui128a, vui128b); } @@ -1931,7 +1931,7 @@ vector bool __int128 test_vec_cmpge_u128(void) { vector bool __int128 test_vec_cmple_s128(void) { // CHECK-LABEL: @test_vec_cmple_s128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpgtsq(<1 x i128> - // CHECK-NEXT: xor <1 x i128> %8, + // CHECK-NEXT: xor <1 x i128> %8, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> return vec_cmple(vsi128a, vsi128b); } @@ -1939,7 +1939,7 @@ vector bool __int128 test_vec_cmple_s128(void) { vector bool __int128 test_vec_cmple_u128(void) { // CHECK-LABEL: @test_vec_cmple_u128( // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpgtuq(<1 x i128> - // CHECK-NEXT: xor <1 x i128> %8, + // CHECK-NEXT: xor <1 x i128> %8, splat (i128 -1) // CHECK-NEXT: ret <1 x i128> return vec_cmple(vui128a, vui128b); } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-p8vector.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p8vector.c index 69ce9d6214e3cc..435465ebbb4be4 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-p8vector.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-p8vector.c @@ -675,15 +675,15 @@ void test1() { /* vec_sr */ res_vsll = vec_sr(vsll, vull); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, splat (i64 64) // CHECK: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, splat (i64 64) // CHECK-LE: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]] res_vull = vec_sr(vull, vull); -// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, +// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, splat (i64 64) // CHECK: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]] -// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, +// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, splat (i64 64) // CHECK-LE: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]] /* vec_sra */ @@ -831,299 +831,299 @@ void test1() { /* vec_nand */ res_vsc = vec_nand(vsc, vsc); // CHECK: [[T1:%.+]] = and <16 x i8> -// CHECK: xor <16 x i8> [[T1]], +// CHECK: xor <16 x i8> [[T1]], splat (i8 -1) // CHECK-LE: [[T1:%.+]] = and <16 x i8> -// CHECK-LE: xor <16 x i8> [[T1]], +// CHECK-LE: xor <16 x i8> [[T1]], splat (i8 -1) // CHECK-PPC: error: call to undeclared function 'vec_nand' res_vbc = vec_nand(vbc, vbc); // CHECK: [[T1:%.+]] = and <16 x i8> -// CHECK: xor <16 x i8> [[T1]], +// CHECK: xor <16 x i8> [[T1]], splat (i8 -1) // CHECK-LE: [[T1:%.+]] = and <16 x i8> -// CHECK-LE: xor <16 x i8> [[T1]], +// CHECK-LE: xor <16 x i8> [[T1]], splat (i8 -1) res_vuc = vec_nand(vuc, vuc); // CHECK: [[T1:%.+]] = and <16 x i8> -// CHECK: xor <16 x i8> [[T1]], +// CHECK: xor <16 x i8> [[T1]], splat (i8 -1) // CHECK-LE: [[T1:%.+]] = and <16 x i8> -// CHECK-LE: xor <16 x i8> [[T1]], +// CHECK-LE: xor <16 x i8> [[T1]], splat (i8 -1) res_vss = vec_nand(vss, vss); // CHECK: [[T1:%.+]] = and <8 x i16> -// CHECK: xor <8 x i16> [[T1]], +// CHECK: xor <8 x i16> [[T1]], splat (i16 -1) // CHECK-LE: [[T1:%.+]] = and <8 x i16> -// CHECK-LE: xor <8 x i16> [[T1]], +// CHECK-LE: xor <8 x i16> [[T1]], splat (i16 -1) res_vbs = vec_nand(vbs, vbs); // CHECK: [[T1:%.+]] = and <8 x i16> -// CHECK: xor <8 x i16> [[T1]], +// CHECK: xor <8 x i16> [[T1]], splat (i16 -1) // CHECK-LE: [[T1:%.+]] = and <8 x i16> -// CHECK-LE: xor <8 x i16> [[T1]], +// CHECK-LE: xor <8 x i16> [[T1]], splat (i16 -1) res_vus = vec_nand(vus, vus); // CHECK: [[T1:%.+]] = and <8 x i16> -// CHECK: xor <8 x i16> [[T1]], +// CHECK: xor <8 x i16> [[T1]], splat (i16 -1) // CHECK-LE: [[T1:%.+]] = and <8 x i16> -// CHECK-LE: xor <8 x i16> [[T1]], +// CHECK-LE: xor <8 x i16> [[T1]], splat (i16 -1) res_vsi = vec_nand(vsi, vsi); // CHECK: [[T1:%.+]] = and <4 x i32> -// CHECK: xor <4 x i32> [[T1]], +// CHECK: xor <4 x i32> [[T1]], splat (i32 -1) // CHECK-LE: [[T1:%.+]] = and <4 x i32> -// CHECK-LE: xor <4 x i32> [[T1]], +// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1) res_vbi = vec_nand(vbi, vbi); // CHECK: [[T1:%.+]] = and <4 x i32> -// CHECK: xor <4 x i32> [[T1]], +// CHECK: xor <4 x i32> [[T1]], splat (i32 -1) // CHECK-LE: [[T1:%.+]] = and <4 x i32> -// CHECK-LE: xor <4 x i32> [[T1]], +// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1) res_vui = vec_nand(vui, vui); // CHECK: [[T1:%.+]] = and <4 x i32> -// CHECK: xor <4 x i32> [[T1]], +// CHECK: xor <4 x i32> [[T1]], splat (i32 -1) // CHECK-LE: [[T1:%.+]] = and <4 x i32> -// CHECK-LE: xor <4 x i32> [[T1]], +// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1) res_vf = vec_nand(vfa, vfa); // CHECK: [[T1:%.+]] = and <4 x i32> -// CHECK: xor <4 x i32> [[T1]], +// CHECK: xor <4 x i32> [[T1]], splat (i32 -1) // CHECK-LE: [[T1:%.+]] = and <4 x i32> -// CHECK-LE: xor <4 x i32> [[T1]], +// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1) res_vsll = vec_nand(vsll, vsll); // CHECK: [[T1:%.+]] = and <2 x i64> -// CHECK: xor <2 x i64> [[T1]], +// CHECK: xor <2 x i64> [[T1]], splat (i64 -1) // CHECK-LE: [[T1:%.+]] = and <2 x i64> -// CHECK-LE: xor <2 x i64> [[T1]], +// CHECK-LE: xor <2 x i64> [[T1]], splat (i64 -1) res_vbll = vec_nand(vbll, vbll); // CHECK: [[T1:%.+]] = and <2 x i64> -// CHECK: xor <2 x i64> [[T1]], +// CHECK: xor <2 x i64> [[T1]], splat (i64 -1) // CHECK-LE: [[T1:%.+]] = and <2 x i64> -// CHECK-LE: xor <2 x i64> [[T1]], +// CHECK-LE: xor <2 x i64> [[T1]], splat (i64 -1) res_vull = vec_nand(vull, vull); // CHECK: [[T1:%.+]] = and <2 x i64> -// CHECK: xor <2 x i64> [[T1]], +// CHECK: xor <2 x i64> [[T1]], splat (i64 -1) // CHECK-LE: [[T1:%.+]] = and <2 x i64> -// CHECK-LE: xor <2 x i64> [[T1]], +// CHECK-LE: xor <2 x i64> [[T1]], splat (i64 -1) res_vd = vec_nand(vda, vda); // CHECK: [[T1:%.+]] = and <2 x i64> -// CHECK: xor <2 x i64> [[T1]], +// CHECK: xor <2 x i64> [[T1]], splat (i64 -1) // CHECK-LE: [[T1:%.+]] = and <2 x i64> -// CHECK-LE: xor <2 x i64> [[T1]], +// CHECK-LE: xor <2 x i64> [[T1]], splat (i64 -1) res_vf = vec_nand(vfa, vfa); // CHECK: [[T1:%.+]] = and <4 x i32> -// CHECK: xor <4 x i32> [[T1]], +// CHECK: xor <4 x i32> [[T1]], splat (i32 -1) // CHECK-LE: [[T1:%.+]] = and <4 x i32> -// CHECK-LE: xor <4 x i32> [[T1]], +// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1) /* vec_orc */ res_vsc = vec_orc(vsc, vsc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] // CHECK-PPC: error: call to undeclared function 'vec_orc' res_vsc = vec_orc(vsc, vbc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] res_vsc = vec_orc(vbc, vsc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] res_vuc = vec_orc(vuc, vuc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] res_vuc = vec_orc(vuc, vbc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] res_vuc = vec_orc(vbc, vuc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] res_vbc = vec_orc(vbc, vbc); -// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK: or <16 x i8> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1) // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]] res_vss = vec_orc(vss, vss); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vss = vec_orc(vss, vbs); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vss = vec_orc(vbs, vss); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vus = vec_orc(vus, vus); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vus = vec_orc(vus, vbs); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vus = vec_orc(vbs, vus); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vbs = vec_orc(vbs, vbs); -// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK: or <8 x i16> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1) // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]] res_vsi = vec_orc(vsi, vsi); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vsi = vec_orc(vsi, vbi); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vsi = vec_orc(vbi, vsi); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vui = vec_orc(vui, vui); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vui = vec_orc(vui, vbi); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vui = vec_orc(vbi, vui); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vbi = vec_orc(vbi, vbi); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vf = vec_orc(vbi, vfa); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vf = vec_orc(vfa, vbi); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vf = vec_orc(vfa, vfb); -// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK: or <4 x i32> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1) // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]] res_vsll = vec_orc(vsll, vsll); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vsll = vec_orc(vsll, vbll); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vsll = vec_orc(vbll, vsll); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vull = vec_orc(vull, vull); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vull = vec_orc(vull, vbll); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vull = vec_orc(vbll, vull); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vbll = vec_orc(vbll, vbll); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vd = vec_orc(vbll, vda); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vd = vec_orc(vda, vbll); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] res_vd = vec_orc(vda, vdb); -// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK: or <2 x i64> {{%.+}}, [[T1]] -// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, +// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1) // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]] /* vec_sub */ @@ -1250,7 +1250,7 @@ vector unsigned int test_vec_addec_unsigned (vector unsigned int a, vector unsig vector signed int test_vec_subec_signed (vector signed int a, vector signed int b, vector signed int c) { return vec_subec(a, b, c); // CHECK-LABEL: @test_vec_subec_signed -// CHECK: xor <4 x i32> {{%[0-9]+}}, +// CHECK: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1) // CHECK: ret <4 x i32> } @@ -1258,7 +1258,7 @@ vector unsigned int test_vec_subec_unsigned (vector unsigned int a, vector unsig return vec_subec(a, b, c); // CHECK-LABEL: @test_vec_subec_unsigned -// CHECK: xor <4 x i32> {{%[0-9]+}}, +// CHECK: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1) // CHECK: ret <4 x i32> } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-quadword.c b/clang/test/CodeGen/PowerPC/builtins-ppc-quadword.c index 6030899a88357f..01fa6c6ad17e0c 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-quadword.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-quadword.c @@ -216,7 +216,7 @@ void test1() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK_PPC: error: call to 'vec_revb' is ambiguous diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c index 1fe56a820512d0..99524fa2f79d09 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c @@ -240,11 +240,11 @@ void test1() { res_vd = vec_andc(vbll, vd); // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> -// CHECK: xor <2 x i64> %{{[0-9]*}}, +// CHECK: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1) // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> // CHECK-LE: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1) // CHECK-LE: and <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> @@ -254,11 +254,11 @@ void test1() { res_vd = vec_andc(vd, vbll); // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> -// CHECK: xor <2 x i64> %{{[0-9]*}}, +// CHECK: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1) // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> // CHECK-LE: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1) // CHECK-LE: and <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> @@ -267,7 +267,7 @@ void test1() { res_vd = vec_andc(vd, vd); // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64> -// CHECK: xor <2 x i64> %{{[0-9]*}}, +// CHECK: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1) // CHECK: and <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double> @@ -927,10 +927,10 @@ void test1() { res_vd = vec_nor(vd, vd); // CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} -// CHECK-NEXT: xor <2 x i64> [[OR]], +// CHECK-NEXT: xor <2 x i64> [[OR]], splat (i64 -1) // CHECK-LE: bitcast <2 x double> %{{[0-9]+}} to <2 x i64> // CHECK-LE: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} -// CHECK-LE-NEXT: xor <2 x i64> [[OR]], +// CHECK-LE-NEXT: xor <2 x i64> [[OR]], splat (i64 -1) /* vec_or */ res_vsll = vec_or(vsll, vsll); @@ -1037,12 +1037,12 @@ void test1() { // CHECK-LE: call void @dummy() res_vd = vec_sel(vd, vd, vbll); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> @@ -1053,73 +1053,73 @@ void test1() { // CHECK-LE: call void @dummy() res_vd = vec_sel(vd, vd, vull); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> // CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> // CHECK-LE: bitcast <2 x i64> %{{[0-9]+}} to <2 x double> res_vbll = vec_sel(vbll, vbll, vbll); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> res_vbll = vec_sel(vbll, vbll, vull); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> res_vsll = vec_sel(vsll, vsll, vbll); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> res_vsll = vec_sel(vsll, vsll, vull); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> res_vull = vec_sel(vull, vull, vbll); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> res_vull = vec_sel(vull, vull, vull); -// CHECK: xor <2 x i64> %{{[0-9]+}}, +// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK: and <2 x i64> %{{[0-9]+}}, // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK: or <2 x i64> -// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, +// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1) // CHECK-LE: and <2 x i64> %{{[0-9]+}}, // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}} // CHECK-LE: or <2 x i64> @@ -1282,16 +1282,16 @@ void test1() { // CHECK-LE: fptosi <2 x double> %{{.*}} to <2 x i64> res_vsll = vec_ctsl(vf, 3); - // CHECK: fmul <4 x float> {{%.*}}, + // CHECK: fmul <4 x float> {{%.*}}, splat (float 8.000000e+00) // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcvspsxds(<4 x float> - // CHECK-LE: fmul <4 x float> {{%.*}}, + // CHECK-LE: fmul <4 x float> {{%.*}}, splat (float 8.000000e+00) // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> // CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcvspsxds(<4 x float> res_vsll = vec_ctsl(vd, 3); - // CHECK: fmul <2 x double> {{%.*}}, + // CHECK: fmul <2 x double> {{%.*}}, splat (double 8.000000e+00) // CHECK: fptosi <2 x double> {{%.*}} to <2 x i64> - // CHECK-LE: fmul <2 x double> {{%.*}}, + // CHECK-LE: fmul <2 x double> {{%.*}}, splat (double 8.000000e+00) // CHECK-LE: fptosi <2 x double> {{%.*}} to <2 x i64> res_vull = vec_ctu(vd, 0); @@ -1307,16 +1307,16 @@ void test1() { // CHECK-LE: fptoui <2 x double> %{{.*}} to <2 x i64> res_vull = vec_ctul(vf, 3); - // CHECK: fmul <4 x float> {{%.*}}, + // CHECK: fmul <4 x float> {{%.*}}, splat (float 8.000000e+00) // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcvspuxds(<4 x float> - // CHECK-LE: fmul <4 x float> {{%.*}}, + // CHECK-LE: fmul <4 x float> {{%.*}}, splat (float 8.000000e+00) // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> // CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcvspuxds(<4 x float> res_vull = vec_ctul(vd, 3); - // CHECK: fmul <2 x double> {{%.*}}, + // CHECK: fmul <2 x double> {{%.*}}, splat (double 8.000000e+00) // CHECK: fptoui <2 x double> {{%.*}} to <2 x i64> - // CHECK-LE: fmul <2 x double> {{%.*}}, + // CHECK-LE: fmul <2 x double> {{%.*}}, splat (double 8.000000e+00) // CHECK-LE: fptoui <2 x double> {{%.*}} to <2 x i64> res_vf = vec_ctf(vsll, 0); @@ -1345,29 +1345,29 @@ void test1() { res_vd = vec_ctd(vsll, 2); // CHECK: sitofp <2 x i64> %{{.*}} to <2 x double> -// CHECK: fmul <2 x double> {{.*}} +// CHECK: fmul <2 x double> {{.*}} splat (double 2.500000e-01) // CHECK-LE: sitofp <2 x i64> %{{.*}} to <2 x double> -// CHECK-LE: fmul <2 x double> {{.*}} +// CHECK-LE: fmul <2 x double> {{.*}} splat (double 2.500000e-01) res_vd = vec_ctd(vull, 2); // CHECK: uitofp <2 x i64> %{{.*}} to <2 x double> -// CHECK: fmul <2 x double> {{.*}} +// CHECK: fmul <2 x double> {{.*}} splat (double 2.500000e-01) // CHECK-LE: uitofp <2 x i64> %{{.*}} to <2 x double> -// CHECK-LE: fmul <2 x double> {{.*}} +// CHECK-LE: fmul <2 x double> {{.*}} splat (double 2.500000e-01) res_vd = vec_ctd(vsi, 2); // CHECK: call <2 x double> @llvm.ppc.vsx.xvcvsxwdp(<4 x i32> -// CHECK: fmul <2 x double> {{.*}} +// CHECK: fmul <2 x double> {{.*}} splat (double 2.500000e-01) // CHECK-LE: vperm // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvcvsxwdp(<4 x i32> -// CHECK-LE: fmul <2 x double> {{.*}} +// CHECK-LE: fmul <2 x double> {{.*}} splat (double 2.500000e-01) res_vd = vec_ctd(vui, 2); // CHECK: call <2 x double> @llvm.ppc.vsx.xvcvuxwdp(<4 x i32> -// CHECK: fmul <2 x double> {{.*}} +// CHECK: fmul <2 x double> {{.*}} splat (double 2.500000e-01) // CHECK-LE: vperm // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvcvuxwdp(<4 x i32> -// CHECK-LE: fmul <2 x double> {{.*}} +// CHECK-LE: fmul <2 x double> {{.*}} splat (double 2.500000e-01) res_vsll = vec_signed(vd); // CHECK: fptosi <2 x double> @@ -1781,7 +1781,7 @@ void test1() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -1789,7 +1789,7 @@ void test1() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -1797,7 +1797,7 @@ void test1() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -1805,7 +1805,7 @@ void test1() { // CHECK: store <16 x i8> , ptr {{%.+}}, align 16 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) // CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 -// CHECK-LE: store <16 x i8> , ptr {{%.+}}, align 16 +// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16 // CHECK-LE: xor <16 x i8> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}}) @@ -2352,10 +2352,10 @@ vector double test_recipdivd(vector double a, vector double b) { vector double test_rsqrtd(vector double a, vector double b) { // CHECK-LABEL: test_rsqrtd // CHECK: call fast <2 x double> @llvm.sqrt.v2f64 - // CHECK: fdiv fast <2 x double> + // CHECK: fdiv fast <2 x double> splat (double 1.000000e+00), // CHECK-LE-LABEL: test_rsqrtd // CHECK-LE: call fast <2 x double> @llvm.sqrt.v2f64 - // CHECK-LE: fdiv fast <2 x double> + // CHECK-LE: fdiv fast <2 x double> splat (double 1.000000e+00) return vec_rsqrt(a); } @@ -2448,7 +2448,7 @@ void test_p8overloads_backwards_compat() { // CHECK: and <4 x i32> // CHECK: or <4 x i32> // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK: xor <2 x i64> {{%.*}}, + // CHECK: xor <2 x i64> {{%.*}}, splat (i64 -1) // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw @@ -2456,7 +2456,7 @@ void test_p8overloads_backwards_compat() { // CHECK-LE: and <4 x i32> // CHECK-LE: or <4 x i32> // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK-LE: xor <2 x i64> {{%.*}}, + // CHECK-LE: xor <2 x i64> {{%.*}}, splat (i64 -1) res_vbll = vec_cmpge(vull, vull); // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw @@ -2464,14 +2464,14 @@ void test_p8overloads_backwards_compat() { // CHECK: and <4 x i32> // CHECK: or <4 x i32> // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK: xor <2 x i64> {{%.*}}, + // CHECK: xor <2 x i64> {{%.*}}, splat (i64 -1) // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> // CHECK-LE: and <4 x i32> // CHECK-LE: or <4 x i32> // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK-LE: xor <2 x i64> {{%.*}}, + // CHECK-LE: xor <2 x i64> {{%.*}}, splat (i64 -1) dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() @@ -2516,7 +2516,7 @@ void test_p8overloads_backwards_compat() { // CHECK: and <4 x i32> // CHECK: or <4 x i32> // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK: xor <2 x i64> {{%.*}}, + // CHECK: xor <2 x i64> {{%.*}}, splat (i64 -1) // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw @@ -2524,7 +2524,7 @@ void test_p8overloads_backwards_compat() { // CHECK-LE: and <4 x i32> // CHECK-LE: or <4 x i32> // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK-LE: xor <2 x i64> {{%.*}}, + // CHECK-LE: xor <2 x i64> {{%.*}}, splat (i64 -1) res_vbll = vec_cmple(vull, vull); // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw @@ -2532,20 +2532,20 @@ void test_p8overloads_backwards_compat() { // CHECK: and <4 x i32> // CHECK: or <4 x i32> // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK: xor <2 x i64> {{%.*}}, + // CHECK: xor <2 x i64> {{%.*}}, splat (i64 -1) // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> // CHECK-LE: and <4 x i32> // CHECK-LE: or <4 x i32> // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK-LE: xor <2 x i64> {{%.*}}, + // CHECK-LE: xor <2 x i64> {{%.*}}, splat (i64 -1) dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() res_vsll = vec_sl(vsll, vull); - // CHECK: urem <2 x i64> {{%.*}}, + // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> @@ -2553,7 +2553,7 @@ void test_p8overloads_backwards_compat() { // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> - // CHECK-LE: urem <2 x i64> {{%.*}}, + // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vslo // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> @@ -2562,7 +2562,7 @@ void test_p8overloads_backwards_compat() { // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> res_vull = vec_sl(vull, vull); - // CHECK: urem <2 x i64> {{%.*}}, + // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> @@ -2570,7 +2570,7 @@ void test_p8overloads_backwards_compat() { // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> - // CHECK-LE: urem <2 x i64> {{%.*}}, + // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vslo // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> @@ -2583,7 +2583,7 @@ void test_p8overloads_backwards_compat() { // CHECK-LE: call void @dummy() res_vsll = vec_sr(vsll, vull); - // CHECK: urem <2 x i64> {{%.*}}, + // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr @@ -2591,7 +2591,7 @@ void test_p8overloads_backwards_compat() { // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> - // CHECK-LE: urem <2 x i64> {{%.*}}, + // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsro // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr @@ -2600,7 +2600,7 @@ void test_p8overloads_backwards_compat() { // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> res_vull = vec_sr(vull, vull); - // CHECK: urem <2 x i64> {{%.*}}, + // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr @@ -2608,7 +2608,7 @@ void test_p8overloads_backwards_compat() { // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> - // CHECK-LE: urem <2 x i64> {{%.*}}, + // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsro // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr @@ -2621,14 +2621,14 @@ void test_p8overloads_backwards_compat() { // CHECK-LE: call void @dummy() res_vsll = vec_sra(vsll, vull); - // CHECK: urem <2 x i64> {{%.*}}, + // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK: ashr <2 x i64> - // CHECK-LE: urem <2 x i64> {{%.*}}, + // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK-LE: ashr <2 x i64> res_vull = vec_sra(vull, vull); - // CHECK: urem <2 x i64> {{%.*}}, + // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK: ashr <2 x i64> - // CHECK-LE: urem <2 x i64> {{%.*}}, + // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64) // CHECK-LE: ashr <2 x i64> /* ----------------------- predicates --------------------------- */ diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c index 0d50c91e1250c8..702714a4e786d5 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c @@ -27,32 +27,32 @@ void test() { res_vf = vec_ctf(vsll, 4); // CHECK: [[TMP0:%.*]] = load <2 x i64>, ptr @vsll, align 16 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.ppc.vsx.xvcvsxdsp(<2 x i64> [[TMP0]]) -// CHECK-NEXT: fmul <4 x float> [[TMP1]], +// CHECK-NEXT: fmul <4 x float> [[TMP1]], splat (float 6.250000e-02) // NOCOMPAT: [[TMP0:%.*]] = load <2 x i64>, ptr @vsll, align 16 // NOCOMPAT-NEXT: [[CONV:%.*]] = sitofp <2 x i64> [[TMP0]] to <2 x double> -// NOCOMPAT-NEXT: fmul <2 x double> [[CONV]], +// NOCOMPAT-NEXT: fmul <2 x double> [[CONV]], splat (double 6.250000e-02) res_vf = vec_ctf(vull, 4); // CHECK: [[TMP2:%.*]] = load <2 x i64>, ptr @vull, align 16 // CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.ppc.vsx.xvcvuxdsp(<2 x i64> [[TMP2]]) -// CHECK-NEXT: fmul <4 x float> [[TMP3]], +// CHECK-NEXT: fmul <4 x float> [[TMP3]], splat (float 6.250000e-02) // NOCOMPAT: [[TMP2:%.*]] = load <2 x i64>, ptr @vull, align 16 // NOCOMPAT-NEXT: [[CONV1:%.*]] = uitofp <2 x i64> [[TMP2]] to <2 x double> -// NOCOMPAT-NEXT: fmul <2 x double> [[CONV1]], +// NOCOMPAT-NEXT: fmul <2 x double> [[CONV1]], splat (double 6.250000e-02) res_vsll = vec_cts(vd, 4); // CHECK: [[TMP4:%.*]] = load <2 x double>, ptr @vd, align 16 -// CHECK-NEXT: fmul <2 x double> [[TMP4]], +// CHECK-NEXT: fmul <2 x double> [[TMP4]], splat (double 1.600000e+01) // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcvdpsxws(<2 x double> // NOCOMPAT: [[TMP4:%.*]] = load <2 x double>, ptr @vd, align 16 -// NOCOMPAT-NEXT: fmul <2 x double> [[TMP4]], +// NOCOMPAT-NEXT: fmul <2 x double> [[TMP4]], splat (double 1.600000e+01) res_vull = vec_ctu(vd, 4); // CHECK: [[TMP8:%.*]] = load <2 x double>, ptr @vd, align 16 -// CHECK-NEXT: fmul <2 x double> [[TMP8]], +// CHECK-NEXT: fmul <2 x double> [[TMP8]], splat (double 1.600000e+01) // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcvdpuxws(<2 x double> // NOCOMPAT: [[TMP7:%.*]] = load <2 x double>, ptr @vd, align 16 -// NOCOMPAT-NEXT: fmul <2 x double> [[TMP7]], +// NOCOMPAT-NEXT: fmul <2 x double> [[TMP7]], splat (double 1.600000e+01) res_vd = vec_round(vd); // CHECK: call double @llvm.ppc.readflm() diff --git a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c index 4c4d0dfce05eaf..00de972009eb2f 100644 --- a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c @@ -438,7 +438,7 @@ test_converts() { // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtepi32_pd // CHECK: call <2 x i64> @vec_unpackh(int vector[4]) // CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z_.]+}} to <2 x double> -// CHECK: fmul <2 x double> %[[CONV]], +// CHECK: fmul <2 x double> %[[CONV]], splat (double 1.000000e+00) // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtepi32_ps // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0) @@ -466,7 +466,7 @@ test_converts() { // CHECK: call <2 x i64> @vec_splats(unsigned long long) // CHECK: call <2 x i64> @vec_unpackl(int vector[4]) // CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z._]+}} to <2 x double> -// CHECK: fmul <2 x double> %[[CONV]], +// CHECK: fmul <2 x double> %[[CONV]], splat (double 1.000000e+00) // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtps_epi32 // CHECK: call <4 x float> @vec_rint(float vector[4]) @@ -1084,23 +1084,23 @@ test_sll() { // CHECK-LABEL: @test_sll // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi16 -// CHECK: store <8 x i16> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 +// CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int) // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int) -// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef ) +// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef splat (i16 15)) // CHECK: call <8 x i16> @vec_sl(unsigned short vector[8], unsigned short vector[8]) // CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi32 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1) -// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef ) +// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef splat (i32 32)) // CHECK: call <4 x i32> @vec_sl(unsigned int vector[4], unsigned int vector[4]) // CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi64 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef zeroext 0) -// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, <2 x i64> noundef ) +// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, <2 x i64> noundef splat (i64 64)) // CHECK: call <2 x i64> @vec_sl(unsigned long long vector[2], unsigned long long vector[2]) // CHECK: call <2 x i64> @vec_sel(unsigned long long vector[2], unsigned long long vector[2], bool vector[2]) @@ -1179,21 +1179,21 @@ test_sra() { // CHECK-LABEL: @test_sra // CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi16 -// CHECK: store <8 x i16> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 +// CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3) -// CHECK: call <8 x i16> @vec_min(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef ) +// CHECK: call <8 x i16> @vec_min(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef splat (i16 15)) // CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi32 -// CHECK: store <4 x i32> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 +// CHECK: store <4 x i32> splat (i32 31), ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1) -// CHECK: call <4 x i32> @vec_min(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) +// CHECK: call <4 x i32> @vec_min(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 31)) // CHECK: call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi16 -// CHECK: store <8 x i16> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 +// CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16 // CHECK: br i1 %[[CMP]] // CHECK: call i1 @llvm.is.constant @@ -1204,7 +1204,7 @@ test_sra() { // CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi32 -// CHECK: store <4 x i32> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 +// CHECK: store <4 x i32> splat (i32 31), ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32 // CHECK: br i1 %[[CMP]] // CHECK: call i1 @llvm.is.constant @@ -1230,23 +1230,23 @@ test_srl() { // CHECK-LABEL: @test_srl // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi16 -// CHECK: store <8 x i16> , ptr %{{[0-9a-zA-Z_.]+}}, align 16 +// CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3) -// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef ) +// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef splat (i16 15)) // CHECK: call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8]) // CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi32 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1) -// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) +// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 32)) // CHECK: call <4 x i32> @vec_sr(unsigned int vector[4], unsigned int vector[4]) // CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4]) // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi64 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0) -// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef ) +// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef splat (i64 64)) // CHECK: call <2 x i64> @vec_sr(unsigned long long vector[2], unsigned long long vector[2]) // CHECK: call <2 x i64> @vec_sel(unsigned long long vector[2], unsigned long long vector[2], bool vector[2]) diff --git a/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c index 8681bb3f9b4f8c..66dfdd48db0ffd 100644 --- a/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c +++ b/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c @@ -218,30 +218,30 @@ test_cmp() { // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ps // CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef , <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef , <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4]) // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ss // CHECK: call <4 x float> @vec_abs(float vector[4]) // CHECK: call <4 x float> @vec_abs(float vector[4]) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef , <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef , <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}) // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ps // CHECK: call <4 x float> @vec_abs(float vector[4]) // CHECK: call <4 x float> @vec_abs(float vector[4]) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040)) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040)) // CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4]) // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ss // CHECK: call <4 x float> @vec_abs(float vector[4]) // CHECK: call <4 x float> @vec_abs(float vector[4]) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) -// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040)) +// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040)) // CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4]) // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef ) diff --git a/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init-no-parentheses.c b/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init-no-parentheses.c index 1c6dea34f5c399..a929c80d5ebe7a 100644 --- a/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init-no-parentheses.c +++ b/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init-no-parentheses.c @@ -51,7 +51,7 @@ void test_vector_bool_pixel_init_no_parentheses(void) { vbi8_1 = (vector bool char)'a'; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned char' // GCC-ERR: error: invalid conversion between vector type '__vector __bool unsigned char' (vector of 16 'unsigned char' values) and integer type 'int' of different size - // XL: + // XL: splat (i8 97) char c = 'c'; vbi8_2 = (vector bool char)c; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned char' @@ -64,7 +64,7 @@ void test_vector_bool_pixel_init_no_parentheses(void) { vbi16_1 = (vector bool short)5; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned short' // GCC-ERR: error: invalid conversion between vector type '__vector __bool unsigned short' (vector of 8 'unsigned short' values) and integer type 'int' of different size - // XL: + // XL: splat (i16 5) short si16 = 55; vbi16_2 = (vector bool short)si16; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned short' @@ -77,7 +77,7 @@ void test_vector_bool_pixel_init_no_parentheses(void) { vbi32_1 = (vector bool int)9; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned int' // GCC-ERR: error: invalid conversion between vector type '__vector __bool unsigned int' (vector of 4 'unsigned int' values) and integer type 'int' of different size - // XL: + // XL: splat (i32 9) int si32 = 99; vbi32_2 = (vector bool int)si32; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned int' @@ -90,7 +90,7 @@ void test_vector_bool_pixel_init_no_parentheses(void) { vbi64_1 = (vector bool long long)13; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned long long' // GCC-ERR: error: invalid conversion between vector type '__vector __bool unsigned long long' (vector of 2 'unsigned long long' values) and integer type 'int' of different size - // XL: + // XL: splat (i64 13) long long si64 = 1313; vbi64_2 = (vector bool long long)si64; // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned long long' @@ -103,5 +103,5 @@ void test_vector_bool_pixel_init_no_parentheses(void) { p1 = (vector pixel)1; // MIXED-ERR: error: invalid conversion between vector type '__vector __pixel ' // GCC-ERR: error: invalid conversion between vector type '__vector __pixel ' (vector of 8 'unsigned short' values) and integer type 'int' of different size - // XL: + // XL: splat (i16 1) } diff --git a/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init.c b/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init.c index 5057a4d593daae..379e8afe1126f8 100644 --- a/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init.c +++ b/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init.c @@ -50,7 +50,7 @@ void test_vector_bool_pixel_init(void) { // vector bool char initialization vbi8_1 = (vector bool char)('a'); // MIXED: - // XL: + // XL: splat (i8 97) // GCC: error: invalid conversion between vector type '__vector __bool unsigned char' (vector of 16 'unsigned char' values) and integer type 'unsigned char' of different size char c = 'c'; vbi8_2 = (vector bool char)(c); @@ -64,7 +64,7 @@ void test_vector_bool_pixel_init(void) { // vector bool short initialization vbi16_1 = (vector bool short)(5); // MIXED: - // XL: + // XL: splat (i16 5) // GCC: error: invalid conversion between vector type '__vector __bool unsigned short' (vector of 8 'unsigned short' values) and integer type 'unsigned short' of different size short si16 = 55; vbi16_2 = (vector bool short)(si16); @@ -78,7 +78,7 @@ void test_vector_bool_pixel_init(void) { // vector bool int initialization vbi32_1 = (vector bool int)(9); // MIXED: - // XL: + // XL: splat (i32 9) // GCC: error: invalid conversion between vector type '__vector __bool unsigned int' (vector of 4 'unsigned int' values) and integer type 'unsigned int' of different size int si32 = 99; vbi32_2 = (vector bool int)(si32); @@ -92,7 +92,7 @@ void test_vector_bool_pixel_init(void) { // vector bool long long initialization vbi64_1 = (vector bool long long)(13); // MIXED: - // XL: + // XL: splat (i64 13) // GCC: error: invalid conversion between vector type '__vector __bool unsigned long long' (vector of 2 'unsigned long long' values) and integer type 'unsigned long long' of different size long long si64 = 1313; vbi64_2 = (vector bool long long)(si64); @@ -106,6 +106,6 @@ void test_vector_bool_pixel_init(void) { // vector pixel initialization p1 = (vector pixel)(1); // MIXED: - // XL: + // XL: splat (i16 1) // GCC: error: invalid conversion between vector type '__vector __pixel ' (vector of 8 'unsigned short' values) and integer type 'unsigned short' of different size } diff --git a/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c b/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c index bb97707a7a9a13..0c81adcfe0b0b0 100644 --- a/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c +++ b/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c @@ -331,7 +331,7 @@ fixed_uint64m1_t xor_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { // CHECK-LABEL: @not_i8( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i8> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i8> [[A]], splat (i8 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -342,7 +342,7 @@ fixed_int8m1_t not_i8(fixed_int8m1_t a) { // CHECK-LABEL: @not_i16( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i16> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i16> [[A]], splat (i16 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -353,7 +353,7 @@ fixed_int16m1_t not_i16(fixed_int16m1_t a) { // CHECK-LABEL: @not_i32( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i32> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i32> [[A]], splat (i32 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -364,7 +364,7 @@ fixed_int32m1_t not_i32(fixed_int32m1_t a) { // CHECK-LABEL: @not_i64( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <4 x i64> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <4 x i64> [[A]], splat (i64 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -375,7 +375,7 @@ fixed_int64m1_t not_i64(fixed_int64m1_t a) { // CHECK-LABEL: @not_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i8> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i8> [[A]], splat (i8 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -386,7 +386,7 @@ fixed_uint8m1_t not_u8(fixed_uint8m1_t a) { // CHECK-LABEL: @not_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i16> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i16> [[A]], splat (i16 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -397,7 +397,7 @@ fixed_uint16m1_t not_u16(fixed_uint16m1_t a) { // CHECK-LABEL: @not_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i32> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i32> [[A]], splat (i32 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -408,7 +408,7 @@ fixed_uint32m1_t not_u32(fixed_uint32m1_t a) { // CHECK-LABEL: @not_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <4 x i64> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <4 x i64> [[A]], splat (i64 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c index 54a3365e03e32d..6a1f8f0e923f65 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c @@ -238,19 +238,19 @@ void test_float(void) { // (emulated) vd = vec_ctd(vsl, 1); // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) - // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> , metadata !{{.*}}) + // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> splat (double 5.000000e-01), metadata !{{.*}}) // (emulated) vd = vec_ctd(vul, 1); // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) - // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> , metadata !{{.*}}) + // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> splat (double 5.000000e-01), metadata !{{.*}}) // (emulated) vd = vec_ctd(vsl, 31); // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) - // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> , metadata !{{.*}}) + // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> splat (double 0x3E00000000000000), metadata !{{.*}}) // (emulated) vd = vec_ctd(vul, 31); // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}}) - // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> , metadata !{{.*}}) + // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> splat (double 0x3E00000000000000), metadata !{{.*}}) // (emulated) vsl = vec_ctsl(vd, 0); @@ -260,19 +260,19 @@ void test_float(void) { // CHECK: call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %{{.*}}, metadata !{{.*}}) // (emulated) vsl = vec_ctsl(vd, 1); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> {{.*}}, <2 x double> , metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> {{.*}}, <2 x double> splat (double 2.000000e+00), metadata !{{.*}}) // CHECK: call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}}) // (emulated) vul = vec_ctul(vd, 1); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> , metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> splat (double 2.000000e+00), metadata !{{.*}}) // CHECK: call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}}) // (emulated) vsl = vec_ctsl(vd, 31); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> , metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> splat (double 0x41E0000000000000), metadata !{{.*}}) // CHECK: call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}}) // (emulated) vul = vec_ctul(vd, 31); - // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> , metadata !{{.*}}) + // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> splat (double 0x41E0000000000000), metadata !{{.*}}) // CHECK: call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}}) // (emulated) diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c index 33f3dce73baea4..06fc1ee05d67ff 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c @@ -701,32 +701,32 @@ void test_core(void) { vuc = vec_genmask(0x8000); // CHECK: <16 x i8> vuc = vec_genmask(0xffff); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 -1) vuc = vec_genmasks_8(0, 7); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 -1) vuc = vec_genmasks_8(1, 4); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 120) vuc = vec_genmasks_8(6, 2); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 -29) vus = vec_genmasks_16(0, 15); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 -1) vus = vec_genmasks_16(2, 11); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 16368) vus = vec_genmasks_16(9, 2); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 -8065) vui = vec_genmasks_32(0, 31); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 -1) vui = vec_genmasks_32(7, 20); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 33552384) vui = vec_genmasks_32(25, 4); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 -134217601) vul = vec_genmasks_64(0, 63); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 -1) vul = vec_genmasks_64(3, 40); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 2305843009205305344) vul = vec_genmasks_64(30, 11); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 -4503582447501313) vsc = vec_splat(vsc, 0); // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer @@ -808,37 +808,37 @@ void test_core(void) { // CHECK-ASM: vrepg vsc = vec_splat_s8(-128); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 -128) vsc = vec_splat_s8(127); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 127) vuc = vec_splat_u8(1); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 1) vuc = vec_splat_u8(254); - // CHECK: <16 x i8> + // CHECK: <16 x i8> splat (i8 -2) vss = vec_splat_s16(-32768); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 -32768) vss = vec_splat_s16(32767); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 32767) vus = vec_splat_u16(1); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 1) vus = vec_splat_u16(65534); - // CHECK: <8 x i16> + // CHECK: <8 x i16> splat (i16 -2) vsi = vec_splat_s32(-32768); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 -32768) vsi = vec_splat_s32(32767); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 32767) vui = vec_splat_u32(-32768); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 -32768) vui = vec_splat_u32(32767); - // CHECK: <4 x i32> + // CHECK: <4 x i32> splat (i32 32767) vsl = vec_splat_s64(-32768); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 -32768) vsl = vec_splat_s64(32767); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 32767) vul = vec_splat_u64(-32768); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 -32768) vul = vec_splat_u64(32767); - // CHECK: <2 x i64> + // CHECK: <2 x i64> splat (i64 32767) vsc = vec_splats(sc); // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer @@ -4471,19 +4471,19 @@ void test_float(void) { // (emulated) vd = vec_ctd(vsl, 1); // CHECK: [[VAL:%[^ ]+]] = sitofp <2 x i64> %{{.*}} to <2 x double> - // CHECK: fmul <2 x double> [[VAL]], + // CHECK: fmul <2 x double> [[VAL]], splat (double 5.000000e-01) // (emulated) vd = vec_ctd(vul, 1); // CHECK: [[VAL:%[^ ]+]] = uitofp <2 x i64> %{{.*}} to <2 x double> - // CHECK: fmul <2 x double> [[VAL]], + // CHECK: fmul <2 x double> [[VAL]], splat (double 5.000000e-01) // (emulated) vd = vec_ctd(vsl, 31); // CHECK: [[VAL:%[^ ]+]] = sitofp <2 x i64> %{{.*}} to <2 x double> - // CHECK: fmul <2 x double> [[VAL]], + // CHECK: fmul <2 x double> [[VAL]], splat (double 0x3E00000000000000) // (emulated) vd = vec_ctd(vul, 31); // CHECK: [[VAL:%[^ ]+]] = uitofp <2 x i64> %{{.*}} to <2 x double> - // CHECK: fmul <2 x double> [[VAL]], + // CHECK: fmul <2 x double> [[VAL]], splat (double 0x3E00000000000000) // (emulated) vsl = vec_ctsl(vd, 0); @@ -4493,19 +4493,19 @@ void test_float(void) { // CHECK: fptoui <2 x double> %{{.*}} to <2 x i64> // (emulated) vsl = vec_ctsl(vd, 1); - // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, + // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, splat (double 2.000000e+00) // CHECK: fptosi <2 x double> [[VAL]] to <2 x i64> // (emulated) vul = vec_ctul(vd, 1); - // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, + // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, splat (double 2.000000e+00) // CHECK: fptoui <2 x double> [[VAL]] to <2 x i64> // (emulated) vsl = vec_ctsl(vd, 31); - // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, + // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, splat (double 0x41E0000000000000) // CHECK: fptosi <2 x double> [[VAL]] to <2 x i64> // (emulated) vul = vec_ctul(vd, 31); - // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, + // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, splat (double 0x41E0000000000000) // CHECK: fptoui <2 x double> [[VAL]] to <2 x i64> // (emulated) diff --git a/clang/test/CodeGen/SystemZ/zvector.c b/clang/test/CodeGen/SystemZ/zvector.c index 2720770624fb67..4b28e394b33b72 100644 --- a/clang/test/CodeGen/SystemZ/zvector.c +++ b/clang/test/CodeGen/SystemZ/zvector.c @@ -124,31 +124,31 @@ void test_neg(void) { // CHECK-LABEL: define{{.*}} void @test_preinc() #0 { // CHECK: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 -// CHECK: [[INC:%.*]] = add <16 x i8> [[TMP0]], +// CHECK: [[INC:%.*]] = add <16 x i8> [[TMP0]], splat (i8 1) // CHECK: store volatile <16 x i8> [[INC]], ptr @sc2, align 8 // CHECK: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8 -// CHECK: [[INC1:%.*]] = add <16 x i8> [[TMP1]], +// CHECK: [[INC1:%.*]] = add <16 x i8> [[TMP1]], splat (i8 1) // CHECK: store volatile <16 x i8> [[INC1]], ptr @uc2, align 8 // CHECK: [[TMP2:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 -// CHECK: [[INC2:%.*]] = add <8 x i16> [[TMP2]], +// CHECK: [[INC2:%.*]] = add <8 x i16> [[TMP2]], splat (i16 1) // CHECK: store volatile <8 x i16> [[INC2]], ptr @ss2, align 8 // CHECK: [[TMP3:%.*]] = load volatile <8 x i16>, ptr @us2, align 8 -// CHECK: [[INC3:%.*]] = add <8 x i16> [[TMP3]], +// CHECK: [[INC3:%.*]] = add <8 x i16> [[TMP3]], splat (i16 1) // CHECK: store volatile <8 x i16> [[INC3]], ptr @us2, align 8 // CHECK: [[TMP4:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 -// CHECK: [[INC4:%.*]] = add <4 x i32> [[TMP4]], +// CHECK: [[INC4:%.*]] = add <4 x i32> [[TMP4]], splat (i32 1) // CHECK: store volatile <4 x i32> [[INC4]], ptr @si2, align 8 // CHECK: [[TMP5:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8 -// CHECK: [[INC5:%.*]] = add <4 x i32> [[TMP5]], +// CHECK: [[INC5:%.*]] = add <4 x i32> [[TMP5]], splat (i32 1) // CHECK: store volatile <4 x i32> [[INC5]], ptr @ui2, align 8 // CHECK: [[TMP6:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 -// CHECK: [[INC6:%.*]] = add <2 x i64> [[TMP6]], +// CHECK: [[INC6:%.*]] = add <2 x i64> [[TMP6]], splat (i64 1) // CHECK: store volatile <2 x i64> [[INC6]], ptr @sl2, align 8 // CHECK: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8 -// CHECK: [[INC7:%.*]] = add <2 x i64> [[TMP7]], +// CHECK: [[INC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 1) // CHECK: store volatile <2 x i64> [[INC7]], ptr @ul2, align 8 // CHECK: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8 -// CHECK: [[INC8:%.*]] = fadd <2 x double> [[TMP8]], +// CHECK: [[INC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double 1.000000e+00) // CHECK: store volatile <2 x double> [[INC8]], ptr @fd2, align 8 // CHECK: ret void void test_preinc(void) { @@ -170,31 +170,31 @@ void test_preinc(void) { // CHECK-LABEL: define{{.*}} void @test_postinc() #0 { // CHECK: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 -// CHECK: [[INC:%.*]] = add <16 x i8> [[TMP0]], +// CHECK: [[INC:%.*]] = add <16 x i8> [[TMP0]], splat (i8 1) // CHECK: store volatile <16 x i8> [[INC]], ptr @sc2, align 8 // CHECK: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8 -// CHECK: [[INC1:%.*]] = add <16 x i8> [[TMP1]], +// CHECK: [[INC1:%.*]] = add <16 x i8> [[TMP1]], splat (i8 1) // CHECK: store volatile <16 x i8> [[INC1]], ptr @uc2, align 8 // CHECK: [[TMP2:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 -// CHECK: [[INC2:%.*]] = add <8 x i16> [[TMP2]], +// CHECK: [[INC2:%.*]] = add <8 x i16> [[TMP2]], splat (i16 1) // CHECK: store volatile <8 x i16> [[INC2]], ptr @ss2, align 8 // CHECK: [[TMP3:%.*]] = load volatile <8 x i16>, ptr @us2, align 8 -// CHECK: [[INC3:%.*]] = add <8 x i16> [[TMP3]], +// CHECK: [[INC3:%.*]] = add <8 x i16> [[TMP3]], splat (i16 1) // CHECK: store volatile <8 x i16> [[INC3]], ptr @us2, align 8 // CHECK: [[TMP4:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 -// CHECK: [[INC4:%.*]] = add <4 x i32> [[TMP4]], +// CHECK: [[INC4:%.*]] = add <4 x i32> [[TMP4]], splat (i32 1) // CHECK: store volatile <4 x i32> [[INC4]], ptr @si2, align 8 // CHECK: [[TMP5:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8 -// CHECK: [[INC5:%.*]] = add <4 x i32> [[TMP5]], +// CHECK: [[INC5:%.*]] = add <4 x i32> [[TMP5]], splat (i32 1) // CHECK: store volatile <4 x i32> [[INC5]], ptr @ui2, align 8 // CHECK: [[TMP6:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 -// CHECK: [[INC6:%.*]] = add <2 x i64> [[TMP6]], +// CHECK: [[INC6:%.*]] = add <2 x i64> [[TMP6]], splat (i64 1) // CHECK: store volatile <2 x i64> [[INC6]], ptr @sl2, align 8 // CHECK: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8 -// CHECK: [[INC7:%.*]] = add <2 x i64> [[TMP7]], +// CHECK: [[INC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 1) // CHECK: store volatile <2 x i64> [[INC7]], ptr @ul2, align 8 // CHECK: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8 -// CHECK: [[INC8:%.*]] = fadd <2 x double> [[TMP8]], +// CHECK: [[INC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double 1.000000e+00) // CHECK: store volatile <2 x double> [[INC8]], ptr @fd2, align 8 // CHECK: ret void void test_postinc(void) { @@ -216,31 +216,31 @@ void test_postinc(void) { // CHECK-LABEL: define{{.*}} void @test_predec() #0 { // CHECK: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 -// CHECK: [[DEC:%.*]] = add <16 x i8> [[TMP0]], +// CHECK: [[DEC:%.*]] = add <16 x i8> [[TMP0]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[DEC]], ptr @sc2, align 8 // CHECK: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8 -// CHECK: [[DEC1:%.*]] = add <16 x i8> [[TMP1]], +// CHECK: [[DEC1:%.*]] = add <16 x i8> [[TMP1]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[DEC1]], ptr @uc2, align 8 // CHECK: [[TMP2:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 -// CHECK: [[DEC2:%.*]] = add <8 x i16> [[TMP2]], +// CHECK: [[DEC2:%.*]] = add <8 x i16> [[TMP2]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[DEC2]], ptr @ss2, align 8 // CHECK: [[TMP3:%.*]] = load volatile <8 x i16>, ptr @us2, align 8 -// CHECK: [[DEC3:%.*]] = add <8 x i16> [[TMP3]], +// CHECK: [[DEC3:%.*]] = add <8 x i16> [[TMP3]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[DEC3]], ptr @us2, align 8 // CHECK: [[TMP4:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 -// CHECK: [[DEC4:%.*]] = add <4 x i32> [[TMP4]], +// CHECK: [[DEC4:%.*]] = add <4 x i32> [[TMP4]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[DEC4]], ptr @si2, align 8 // CHECK: [[TMP5:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8 -// CHECK: [[DEC5:%.*]] = add <4 x i32> [[TMP5]], +// CHECK: [[DEC5:%.*]] = add <4 x i32> [[TMP5]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[DEC5]], ptr @ui2, align 8 // CHECK: [[TMP6:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 -// CHECK: [[DEC6:%.*]] = add <2 x i64> [[TMP6]], +// CHECK: [[DEC6:%.*]] = add <2 x i64> [[TMP6]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[DEC6]], ptr @sl2, align 8 // CHECK: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8 -// CHECK: [[DEC7:%.*]] = add <2 x i64> [[TMP7]], +// CHECK: [[DEC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[DEC7]], ptr @ul2, align 8 // CHECK: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8 -// CHECK: [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], +// CHECK: [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double -1.000000e+00) // CHECK: store volatile <2 x double> [[DEC8]], ptr @fd2, align 8 // CHECK: ret void void test_predec(void) { @@ -262,31 +262,31 @@ void test_predec(void) { // CHECK-LABEL: define{{.*}} void @test_postdec() #0 { // CHECK: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 -// CHECK: [[DEC:%.*]] = add <16 x i8> [[TMP0]], +// CHECK: [[DEC:%.*]] = add <16 x i8> [[TMP0]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[DEC]], ptr @sc2, align 8 // CHECK: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8 -// CHECK: [[DEC1:%.*]] = add <16 x i8> [[TMP1]], +// CHECK: [[DEC1:%.*]] = add <16 x i8> [[TMP1]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[DEC1]], ptr @uc2, align 8 // CHECK: [[TMP2:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 -// CHECK: [[DEC2:%.*]] = add <8 x i16> [[TMP2]], +// CHECK: [[DEC2:%.*]] = add <8 x i16> [[TMP2]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[DEC2]], ptr @ss2, align 8 // CHECK: [[TMP3:%.*]] = load volatile <8 x i16>, ptr @us2, align 8 -// CHECK: [[DEC3:%.*]] = add <8 x i16> [[TMP3]], +// CHECK: [[DEC3:%.*]] = add <8 x i16> [[TMP3]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[DEC3]], ptr @us2, align 8 // CHECK: [[TMP4:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 -// CHECK: [[DEC4:%.*]] = add <4 x i32> [[TMP4]], +// CHECK: [[DEC4:%.*]] = add <4 x i32> [[TMP4]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[DEC4]], ptr @si2, align 8 // CHECK: [[TMP5:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8 -// CHECK: [[DEC5:%.*]] = add <4 x i32> [[TMP5]], +// CHECK: [[DEC5:%.*]] = add <4 x i32> [[TMP5]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[DEC5]], ptr @ui2, align 8 // CHECK: [[TMP6:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 -// CHECK: [[DEC6:%.*]] = add <2 x i64> [[TMP6]], +// CHECK: [[DEC6:%.*]] = add <2 x i64> [[TMP6]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[DEC6]], ptr @sl2, align 8 // CHECK: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8 -// CHECK: [[DEC7:%.*]] = add <2 x i64> [[TMP7]], +// CHECK: [[DEC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[DEC7]], ptr @ul2, align 8 // CHECK: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8 -// CHECK: [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], +// CHECK: [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double -1.000000e+00) // CHECK: store volatile <2 x double> [[DEC8]], ptr @fd2, align 8 // CHECK: ret void void test_postdec(void) { @@ -1086,40 +1086,40 @@ void test_rem_assign(void) { // CHECK-LABEL: define{{.*}} void @test_not() #0 { // CHECK: [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 -// CHECK: [[NEG:%.*]] = xor <16 x i8> [[TMP0]], +// CHECK: [[NEG:%.*]] = xor <16 x i8> [[TMP0]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[NEG]], ptr @sc, align 8 // CHECK: [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8 -// CHECK: [[NEG1:%.*]] = xor <16 x i8> [[TMP1]], +// CHECK: [[NEG1:%.*]] = xor <16 x i8> [[TMP1]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[NEG1]], ptr @uc, align 8 // CHECK: [[TMP2:%.*]] = load volatile <16 x i8>, ptr @bc2, align 8 -// CHECK: [[NEG2:%.*]] = xor <16 x i8> [[TMP2]], +// CHECK: [[NEG2:%.*]] = xor <16 x i8> [[TMP2]], splat (i8 -1) // CHECK: store volatile <16 x i8> [[NEG2]], ptr @bc, align 8 // CHECK: [[TMP3:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 -// CHECK: [[NEG3:%.*]] = xor <8 x i16> [[TMP3]], +// CHECK: [[NEG3:%.*]] = xor <8 x i16> [[TMP3]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[NEG3]], ptr @ss, align 8 // CHECK: [[TMP4:%.*]] = load volatile <8 x i16>, ptr @us2, align 8 -// CHECK: [[NEG4:%.*]] = xor <8 x i16> [[TMP4]], +// CHECK: [[NEG4:%.*]] = xor <8 x i16> [[TMP4]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[NEG4]], ptr @us, align 8 // CHECK: [[TMP5:%.*]] = load volatile <8 x i16>, ptr @bs2, align 8 -// CHECK: [[NEG5:%.*]] = xor <8 x i16> [[TMP5]], +// CHECK: [[NEG5:%.*]] = xor <8 x i16> [[TMP5]], splat (i16 -1) // CHECK: store volatile <8 x i16> [[NEG5]], ptr @bs, align 8 // CHECK: [[TMP6:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 -// CHECK: [[NEG6:%.*]] = xor <4 x i32> [[TMP6]], +// CHECK: [[NEG6:%.*]] = xor <4 x i32> [[TMP6]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[NEG6]], ptr @si, align 8 // CHECK: [[TMP7:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8 -// CHECK: [[NEG7:%.*]] = xor <4 x i32> [[TMP7]], +// CHECK: [[NEG7:%.*]] = xor <4 x i32> [[TMP7]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[NEG7]], ptr @ui, align 8 // CHECK: [[TMP8:%.*]] = load volatile <4 x i32>, ptr @bi2, align 8 -// CHECK: [[NEG8:%.*]] = xor <4 x i32> [[TMP8]], +// CHECK: [[NEG8:%.*]] = xor <4 x i32> [[TMP8]], splat (i32 -1) // CHECK: store volatile <4 x i32> [[NEG8]], ptr @bi, align 8 // CHECK: [[TMP9:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 -// CHECK: [[NEG9:%.*]] = xor <2 x i64> [[TMP9]], +// CHECK: [[NEG9:%.*]] = xor <2 x i64> [[TMP9]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[NEG9]], ptr @sl, align 8 // CHECK: [[TMP10:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8 -// CHECK: [[NEG10:%.*]] = xor <2 x i64> [[TMP10]], +// CHECK: [[NEG10:%.*]] = xor <2 x i64> [[TMP10]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[NEG10]], ptr @ul, align 8 // CHECK: [[TMP11:%.*]] = load volatile <2 x i64>, ptr @bl2, align 8 -// CHECK: [[NEG11:%.*]] = xor <2 x i64> [[TMP11]], +// CHECK: [[NEG11:%.*]] = xor <2 x i64> [[TMP11]], splat (i64 -1) // CHECK: store volatile <2 x i64> [[NEG11]], ptr @bl, align 8 // CHECK: ret void void test_not(void) { @@ -1932,7 +1932,7 @@ void test_xor_assign(void) { // CHECK: [[SHL2:%.*]] = shl <16 x i8> [[TMP4]], [[SH_PROM]] // CHECK: store volatile <16 x i8> [[SHL2]], ptr @sc, align 8 // CHECK: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @sc, align 8 -// CHECK: [[SHL3:%.*]] = shl <16 x i8> [[TMP6]], +// CHECK: [[SHL3:%.*]] = shl <16 x i8> [[TMP6]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHL3]], ptr @sc, align 8 // CHECK: [[TMP7:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 // CHECK: [[TMP8:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 @@ -1950,7 +1950,7 @@ void test_xor_assign(void) { // CHECK: [[SHL9:%.*]] = shl <16 x i8> [[TMP11]], [[SH_PROM8]] // CHECK: store volatile <16 x i8> [[SHL9]], ptr @uc, align 8 // CHECK: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 -// CHECK: [[SHL10:%.*]] = shl <16 x i8> [[TMP13]], +// CHECK: [[SHL10:%.*]] = shl <16 x i8> [[TMP13]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHL10]], ptr @uc, align 8 // CHECK: [[TMP14:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 // CHECK: [[TMP15:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 @@ -1968,7 +1968,7 @@ void test_xor_assign(void) { // CHECK: [[SHL16:%.*]] = shl <8 x i16> [[TMP18]], [[SH_PROM15]] // CHECK: store volatile <8 x i16> [[SHL16]], ptr @ss, align 8 // CHECK: [[TMP20:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 -// CHECK: [[SHL17:%.*]] = shl <8 x i16> [[TMP20]], +// CHECK: [[SHL17:%.*]] = shl <8 x i16> [[TMP20]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHL17]], ptr @ss, align 8 // CHECK: [[TMP21:%.*]] = load volatile <8 x i16>, ptr @us, align 8 // CHECK: [[TMP22:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 @@ -1986,7 +1986,7 @@ void test_xor_assign(void) { // CHECK: [[SHL23:%.*]] = shl <8 x i16> [[TMP25]], [[SH_PROM22]] // CHECK: store volatile <8 x i16> [[SHL23]], ptr @us, align 8 // CHECK: [[TMP27:%.*]] = load volatile <8 x i16>, ptr @us, align 8 -// CHECK: [[SHL24:%.*]] = shl <8 x i16> [[TMP27]], +// CHECK: [[SHL24:%.*]] = shl <8 x i16> [[TMP27]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHL24]], ptr @us, align 8 // CHECK: [[TMP28:%.*]] = load volatile <4 x i32>, ptr @si, align 8 // CHECK: [[TMP29:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 @@ -2003,7 +2003,7 @@ void test_xor_assign(void) { // CHECK: [[SHL29:%.*]] = shl <4 x i32> [[TMP32]], [[SPLAT_SPLAT28]] // CHECK: store volatile <4 x i32> [[SHL29]], ptr @si, align 8 // CHECK: [[TMP34:%.*]] = load volatile <4 x i32>, ptr @si, align 8 -// CHECK: [[SHL30:%.*]] = shl <4 x i32> [[TMP34]], +// CHECK: [[SHL30:%.*]] = shl <4 x i32> [[TMP34]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHL30]], ptr @si, align 8 // CHECK: [[TMP35:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 // CHECK: [[TMP36:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 @@ -2020,7 +2020,7 @@ void test_xor_assign(void) { // CHECK: [[SHL35:%.*]] = shl <4 x i32> [[TMP39]], [[SPLAT_SPLAT34]] // CHECK: store volatile <4 x i32> [[SHL35]], ptr @ui, align 8 // CHECK: [[TMP41:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 -// CHECK: [[SHL36:%.*]] = shl <4 x i32> [[TMP41]], +// CHECK: [[SHL36:%.*]] = shl <4 x i32> [[TMP41]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHL36]], ptr @ui, align 8 // CHECK: [[TMP42:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 // CHECK: [[TMP43:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 @@ -2038,7 +2038,7 @@ void test_xor_assign(void) { // CHECK: [[SHL42:%.*]] = shl <2 x i64> [[TMP46]], [[SH_PROM41]] // CHECK: store volatile <2 x i64> [[SHL42]], ptr @sl, align 8 // CHECK: [[TMP48:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 -// CHECK: [[SHL43:%.*]] = shl <2 x i64> [[TMP48]], +// CHECK: [[SHL43:%.*]] = shl <2 x i64> [[TMP48]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHL43]], ptr @sl, align 8 // CHECK: [[TMP49:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 // CHECK: [[TMP50:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 @@ -2056,7 +2056,7 @@ void test_xor_assign(void) { // CHECK: [[SHL49:%.*]] = shl <2 x i64> [[TMP53]], [[SH_PROM48]] // CHECK: store volatile <2 x i64> [[SHL49]], ptr @ul, align 8 // CHECK: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 -// CHECK: [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], +// CHECK: [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHL50]], ptr @ul, align 8 // CHECK: ret void void test_sl(void) { @@ -2115,7 +2115,7 @@ void test_sl(void) { // CHECK: [[SHL2:%.*]] = shl <16 x i8> [[TMP5]], [[SH_PROM]] // CHECK: store volatile <16 x i8> [[SHL2]], ptr @sc, align 8 // CHECK: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @sc, align 8 -// CHECK: [[SHL3:%.*]] = shl <16 x i8> [[TMP6]], +// CHECK: [[SHL3:%.*]] = shl <16 x i8> [[TMP6]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHL3]], ptr @sc, align 8 // CHECK: [[TMP7:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 // CHECK: [[TMP8:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 @@ -2133,7 +2133,7 @@ void test_sl(void) { // CHECK: [[SHL9:%.*]] = shl <16 x i8> [[TMP12]], [[SH_PROM8]] // CHECK: store volatile <16 x i8> [[SHL9]], ptr @uc, align 8 // CHECK: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 -// CHECK: [[SHL10:%.*]] = shl <16 x i8> [[TMP13]], +// CHECK: [[SHL10:%.*]] = shl <16 x i8> [[TMP13]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHL10]], ptr @uc, align 8 // CHECK: [[TMP14:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 // CHECK: [[TMP15:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 @@ -2151,7 +2151,7 @@ void test_sl(void) { // CHECK: [[SHL16:%.*]] = shl <8 x i16> [[TMP19]], [[SH_PROM15]] // CHECK: store volatile <8 x i16> [[SHL16]], ptr @ss, align 8 // CHECK: [[TMP20:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 -// CHECK: [[SHL17:%.*]] = shl <8 x i16> [[TMP20]], +// CHECK: [[SHL17:%.*]] = shl <8 x i16> [[TMP20]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHL17]], ptr @ss, align 8 // CHECK: [[TMP21:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 // CHECK: [[TMP22:%.*]] = load volatile <8 x i16>, ptr @us, align 8 @@ -2169,7 +2169,7 @@ void test_sl(void) { // CHECK: [[SHL23:%.*]] = shl <8 x i16> [[TMP26]], [[SH_PROM22]] // CHECK: store volatile <8 x i16> [[SHL23]], ptr @us, align 8 // CHECK: [[TMP27:%.*]] = load volatile <8 x i16>, ptr @us, align 8 -// CHECK: [[SHL24:%.*]] = shl <8 x i16> [[TMP27]], +// CHECK: [[SHL24:%.*]] = shl <8 x i16> [[TMP27]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHL24]], ptr @us, align 8 // CHECK: [[TMP28:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 // CHECK: [[TMP29:%.*]] = load volatile <4 x i32>, ptr @si, align 8 @@ -2186,7 +2186,7 @@ void test_sl(void) { // CHECK: [[SHL29:%.*]] = shl <4 x i32> [[TMP33]], [[SPLAT_SPLAT28]] // CHECK: store volatile <4 x i32> [[SHL29]], ptr @si, align 8 // CHECK: [[TMP34:%.*]] = load volatile <4 x i32>, ptr @si, align 8 -// CHECK: [[SHL30:%.*]] = shl <4 x i32> [[TMP34]], +// CHECK: [[SHL30:%.*]] = shl <4 x i32> [[TMP34]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHL30]], ptr @si, align 8 // CHECK: [[TMP35:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 // CHECK: [[TMP36:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 @@ -2203,7 +2203,7 @@ void test_sl(void) { // CHECK: [[SHL35:%.*]] = shl <4 x i32> [[TMP40]], [[SPLAT_SPLAT34]] // CHECK: store volatile <4 x i32> [[SHL35]], ptr @ui, align 8 // CHECK: [[TMP41:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 -// CHECK: [[SHL36:%.*]] = shl <4 x i32> [[TMP41]], +// CHECK: [[SHL36:%.*]] = shl <4 x i32> [[TMP41]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHL36]], ptr @ui, align 8 // CHECK: [[TMP42:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 // CHECK: [[TMP43:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 @@ -2221,7 +2221,7 @@ void test_sl(void) { // CHECK: [[SHL42:%.*]] = shl <2 x i64> [[TMP47]], [[SH_PROM41]] // CHECK: store volatile <2 x i64> [[SHL42]], ptr @sl, align 8 // CHECK: [[TMP48:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 -// CHECK: [[SHL43:%.*]] = shl <2 x i64> [[TMP48]], +// CHECK: [[SHL43:%.*]] = shl <2 x i64> [[TMP48]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHL43]], ptr @sl, align 8 // CHECK: [[TMP49:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 // CHECK: [[TMP50:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 @@ -2239,7 +2239,7 @@ void test_sl(void) { // CHECK: [[SHL49:%.*]] = shl <2 x i64> [[TMP54]], [[SH_PROM48]] // CHECK: store volatile <2 x i64> [[SHL49]], ptr @ul, align 8 // CHECK: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 -// CHECK: [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], +// CHECK: [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHL50]], ptr @ul, align 8 // CHECK: ret void void test_sl_assign(void) { @@ -2298,7 +2298,7 @@ void test_sl_assign(void) { // CHECK: [[SHR2:%.*]] = ashr <16 x i8> [[TMP4]], [[SH_PROM]] // CHECK: store volatile <16 x i8> [[SHR2]], ptr @sc, align 8 // CHECK: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @sc, align 8 -// CHECK: [[SHR3:%.*]] = ashr <16 x i8> [[TMP6]], +// CHECK: [[SHR3:%.*]] = ashr <16 x i8> [[TMP6]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHR3]], ptr @sc, align 8 // CHECK: [[TMP7:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 // CHECK: [[TMP8:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 @@ -2316,7 +2316,7 @@ void test_sl_assign(void) { // CHECK: [[SHR9:%.*]] = lshr <16 x i8> [[TMP11]], [[SH_PROM8]] // CHECK: store volatile <16 x i8> [[SHR9]], ptr @uc, align 8 // CHECK: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 -// CHECK: [[SHR10:%.*]] = lshr <16 x i8> [[TMP13]], +// CHECK: [[SHR10:%.*]] = lshr <16 x i8> [[TMP13]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHR10]], ptr @uc, align 8 // CHECK: [[TMP14:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 // CHECK: [[TMP15:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 @@ -2334,7 +2334,7 @@ void test_sl_assign(void) { // CHECK: [[SHR16:%.*]] = ashr <8 x i16> [[TMP18]], [[SH_PROM15]] // CHECK: store volatile <8 x i16> [[SHR16]], ptr @ss, align 8 // CHECK: [[TMP20:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 -// CHECK: [[SHR17:%.*]] = ashr <8 x i16> [[TMP20]], +// CHECK: [[SHR17:%.*]] = ashr <8 x i16> [[TMP20]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHR17]], ptr @ss, align 8 // CHECK: [[TMP21:%.*]] = load volatile <8 x i16>, ptr @us, align 8 // CHECK: [[TMP22:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 @@ -2352,7 +2352,7 @@ void test_sl_assign(void) { // CHECK: [[SHR23:%.*]] = lshr <8 x i16> [[TMP25]], [[SH_PROM22]] // CHECK: store volatile <8 x i16> [[SHR23]], ptr @us, align 8 // CHECK: [[TMP27:%.*]] = load volatile <8 x i16>, ptr @us, align 8 -// CHECK: [[SHR24:%.*]] = lshr <8 x i16> [[TMP27]], +// CHECK: [[SHR24:%.*]] = lshr <8 x i16> [[TMP27]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHR24]], ptr @us, align 8 // CHECK: [[TMP28:%.*]] = load volatile <4 x i32>, ptr @si, align 8 // CHECK: [[TMP29:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 @@ -2369,7 +2369,7 @@ void test_sl_assign(void) { // CHECK: [[SHR29:%.*]] = ashr <4 x i32> [[TMP32]], [[SPLAT_SPLAT28]] // CHECK: store volatile <4 x i32> [[SHR29]], ptr @si, align 8 // CHECK: [[TMP34:%.*]] = load volatile <4 x i32>, ptr @si, align 8 -// CHECK: [[SHR30:%.*]] = ashr <4 x i32> [[TMP34]], +// CHECK: [[SHR30:%.*]] = ashr <4 x i32> [[TMP34]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHR30]], ptr @si, align 8 // CHECK: [[TMP35:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 // CHECK: [[TMP36:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 @@ -2386,7 +2386,7 @@ void test_sl_assign(void) { // CHECK: [[SHR35:%.*]] = lshr <4 x i32> [[TMP39]], [[SPLAT_SPLAT34]] // CHECK: store volatile <4 x i32> [[SHR35]], ptr @ui, align 8 // CHECK: [[TMP41:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 -// CHECK: [[SHR36:%.*]] = lshr <4 x i32> [[TMP41]], +// CHECK: [[SHR36:%.*]] = lshr <4 x i32> [[TMP41]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHR36]], ptr @ui, align 8 // CHECK: [[TMP42:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 // CHECK: [[TMP43:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 @@ -2404,7 +2404,7 @@ void test_sl_assign(void) { // CHECK: [[SHR42:%.*]] = ashr <2 x i64> [[TMP46]], [[SH_PROM41]] // CHECK: store volatile <2 x i64> [[SHR42]], ptr @sl, align 8 // CHECK: [[TMP48:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 -// CHECK: [[SHR43:%.*]] = ashr <2 x i64> [[TMP48]], +// CHECK: [[SHR43:%.*]] = ashr <2 x i64> [[TMP48]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHR43]], ptr @sl, align 8 // CHECK: [[TMP49:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 // CHECK: [[TMP50:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 @@ -2422,7 +2422,7 @@ void test_sl_assign(void) { // CHECK: [[SHR49:%.*]] = lshr <2 x i64> [[TMP53]], [[SH_PROM48]] // CHECK: store volatile <2 x i64> [[SHR49]], ptr @ul, align 8 // CHECK: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 -// CHECK: [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], +// CHECK: [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHR50]], ptr @ul, align 8 // CHECK: ret void void test_sr(void) { @@ -2481,7 +2481,7 @@ void test_sr(void) { // CHECK: [[SHR2:%.*]] = ashr <16 x i8> [[TMP5]], [[SH_PROM]] // CHECK: store volatile <16 x i8> [[SHR2]], ptr @sc, align 8 // CHECK: [[TMP6:%.*]] = load volatile <16 x i8>, ptr @sc, align 8 -// CHECK: [[SHR3:%.*]] = ashr <16 x i8> [[TMP6]], +// CHECK: [[SHR3:%.*]] = ashr <16 x i8> [[TMP6]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHR3]], ptr @sc, align 8 // CHECK: [[TMP7:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8 // CHECK: [[TMP8:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 @@ -2499,7 +2499,7 @@ void test_sr(void) { // CHECK: [[SHR9:%.*]] = lshr <16 x i8> [[TMP12]], [[SH_PROM8]] // CHECK: store volatile <16 x i8> [[SHR9]], ptr @uc, align 8 // CHECK: [[TMP13:%.*]] = load volatile <16 x i8>, ptr @uc, align 8 -// CHECK: [[SHR10:%.*]] = lshr <16 x i8> [[TMP13]], +// CHECK: [[SHR10:%.*]] = lshr <16 x i8> [[TMP13]], splat (i8 5) // CHECK: store volatile <16 x i8> [[SHR10]], ptr @uc, align 8 // CHECK: [[TMP14:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 // CHECK: [[TMP15:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 @@ -2517,7 +2517,7 @@ void test_sr(void) { // CHECK: [[SHR16:%.*]] = ashr <8 x i16> [[TMP19]], [[SH_PROM15]] // CHECK: store volatile <8 x i16> [[SHR16]], ptr @ss, align 8 // CHECK: [[TMP20:%.*]] = load volatile <8 x i16>, ptr @ss, align 8 -// CHECK: [[SHR17:%.*]] = ashr <8 x i16> [[TMP20]], +// CHECK: [[SHR17:%.*]] = ashr <8 x i16> [[TMP20]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHR17]], ptr @ss, align 8 // CHECK: [[TMP21:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8 // CHECK: [[TMP22:%.*]] = load volatile <8 x i16>, ptr @us, align 8 @@ -2535,7 +2535,7 @@ void test_sr(void) { // CHECK: [[SHR23:%.*]] = lshr <8 x i16> [[TMP26]], [[SH_PROM22]] // CHECK: store volatile <8 x i16> [[SHR23]], ptr @us, align 8 // CHECK: [[TMP27:%.*]] = load volatile <8 x i16>, ptr @us, align 8 -// CHECK: [[SHR24:%.*]] = lshr <8 x i16> [[TMP27]], +// CHECK: [[SHR24:%.*]] = lshr <8 x i16> [[TMP27]], splat (i16 5) // CHECK: store volatile <8 x i16> [[SHR24]], ptr @us, align 8 // CHECK: [[TMP28:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 // CHECK: [[TMP29:%.*]] = load volatile <4 x i32>, ptr @si, align 8 @@ -2552,7 +2552,7 @@ void test_sr(void) { // CHECK: [[SHR29:%.*]] = ashr <4 x i32> [[TMP33]], [[SPLAT_SPLAT28]] // CHECK: store volatile <4 x i32> [[SHR29]], ptr @si, align 8 // CHECK: [[TMP34:%.*]] = load volatile <4 x i32>, ptr @si, align 8 -// CHECK: [[SHR30:%.*]] = ashr <4 x i32> [[TMP34]], +// CHECK: [[SHR30:%.*]] = ashr <4 x i32> [[TMP34]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHR30]], ptr @si, align 8 // CHECK: [[TMP35:%.*]] = load volatile <4 x i32>, ptr @si2, align 8 // CHECK: [[TMP36:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 @@ -2569,7 +2569,7 @@ void test_sr(void) { // CHECK: [[SHR35:%.*]] = lshr <4 x i32> [[TMP40]], [[SPLAT_SPLAT34]] // CHECK: store volatile <4 x i32> [[SHR35]], ptr @ui, align 8 // CHECK: [[TMP41:%.*]] = load volatile <4 x i32>, ptr @ui, align 8 -// CHECK: [[SHR36:%.*]] = lshr <4 x i32> [[TMP41]], +// CHECK: [[SHR36:%.*]] = lshr <4 x i32> [[TMP41]], splat (i32 5) // CHECK: store volatile <4 x i32> [[SHR36]], ptr @ui, align 8 // CHECK: [[TMP42:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 // CHECK: [[TMP43:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 @@ -2587,7 +2587,7 @@ void test_sr(void) { // CHECK: [[SHR42:%.*]] = ashr <2 x i64> [[TMP47]], [[SH_PROM41]] // CHECK: store volatile <2 x i64> [[SHR42]], ptr @sl, align 8 // CHECK: [[TMP48:%.*]] = load volatile <2 x i64>, ptr @sl, align 8 -// CHECK: [[SHR43:%.*]] = ashr <2 x i64> [[TMP48]], +// CHECK: [[SHR43:%.*]] = ashr <2 x i64> [[TMP48]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHR43]], ptr @sl, align 8 // CHECK: [[TMP49:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8 // CHECK: [[TMP50:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 @@ -2605,7 +2605,7 @@ void test_sr(void) { // CHECK: [[SHR49:%.*]] = lshr <2 x i64> [[TMP54]], [[SH_PROM48]] // CHECK: store volatile <2 x i64> [[SHR49]], ptr @ul, align 8 // CHECK: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8 -// CHECK: [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], +// CHECK: [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], splat (i64 5) // CHECK: store volatile <2 x i64> [[SHR50]], ptr @ul, align 8 // CHECK: ret void void test_sr_assign(void) { diff --git a/clang/test/CodeGen/SystemZ/zvector2.c b/clang/test/CodeGen/SystemZ/zvector2.c index 36cbf228feac86..5b036433519bdd 100644 --- a/clang/test/CodeGen/SystemZ/zvector2.c +++ b/clang/test/CodeGen/SystemZ/zvector2.c @@ -32,7 +32,7 @@ void test_preinc (void) { // CHECK-LABEL: test_preinc // CHECK: [[VAL:%[^ ]+]] = load volatile <4 x float>, ptr @ff2 -// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], +// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], splat (float 1.000000e+00) ++ff2; } @@ -40,7 +40,7 @@ void test_postinc (void) { // CHECK-LABEL: test_postinc // CHECK: [[VAL:%[^ ]+]] = load volatile <4 x float>, ptr @ff2 -// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], +// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], splat (float 1.000000e+00) ff2++; } @@ -48,7 +48,7 @@ void test_predec (void) { // CHECK-LABEL: test_predec // CHECK: [[VAL:%[^ ]+]] = load volatile <4 x float>, ptr @ff2 -// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], +// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], splat (float -1.000000e+00) --ff2; } @@ -56,7 +56,7 @@ void test_postdec (void) { // CHECK-LABEL: test_postdec // CHECK: [[VAL:%[^ ]+]] = load volatile <4 x float>, ptr @ff2 -// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], +// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], splat (float -1.000000e+00) ff2--; } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 4e56204c8ad40f..9ed6d47e8808a9 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -53,14 +53,14 @@ __m256 test_mm256_and_ps(__m256 A, __m256 B) { __m256d test_mm256_andnot_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_andnot_pd - // CHECK: xor <4 x i64> %{{.*}}, + // CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <4 x i64> return _mm256_andnot_pd(A, B); } __m256 test_mm256_andnot_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_andnot_ps - // CHECK: xor <8 x i32> %{{.*}}, + // CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <8 x i32> return _mm256_andnot_ps(A, B); } diff --git a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c index cd94edcf58ea2f..9935137ca86775 100644 --- a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c @@ -274,7 +274,7 @@ __m256bh test_mm256_loadu_pbh(void *p) { __m128bh test_mm_load_sbh(void const *A) { // CHECK-LABEL: test_mm_load_sbh - // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> bitcast (<1 x i8> to <8 x i1>), <8 x bfloat> %{{.*}}) + // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> bitcast (<1 x i8> splat (i8 1) to <8 x i1>), <8 x bfloat> %{{.*}}) return _mm_load_sbh(A); } diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index a4a3a08efad51b..27da56fb757230 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -869,18 +869,18 @@ __m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) { __m256i test_mm256_mul_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_mul_epi32 - // CHECK: shl <4 x i64> %{{.*}}, - // CHECK: ashr <4 x i64> %{{.*}}, - // CHECK: shl <4 x i64> %{{.*}}, - // CHECK: ashr <4 x i64> %{{.*}}, + // CHECK: shl <4 x i64> %{{.*}}, splat (i64 32) + // CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32) + // CHECK: shl <4 x i64> %{{.*}}, splat (i64 32) + // CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32) // CHECK: mul <4 x i64> %{{.*}}, %{{.*}} return _mm256_mul_epi32(a, b); } __m256i test_mm256_mul_epu32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_mul_epu32 - // CHECK: and <4 x i64> %{{.*}}, - // CHECK: and <4 x i64> %{{.*}}, + // CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295) + // CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295) // CHECK: mul <4 x i64> %{{.*}}, %{{.*}} return _mm256_mul_epu32(a, b); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index f82a5d17b292ac..1d18ca8548a3a3 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -7,14 +7,14 @@ __mmask32 test_knot_mask32(__mmask32 a) { // CHECK-LABEL: @test_knot_mask32 // CHECK: [[IN:%.*]] = bitcast i32 %{{.*}} to <32 x i1> - // CHECK: [[NOT:%.*]] = xor <32 x i1> [[IN]], + // CHECK: [[NOT:%.*]] = xor <32 x i1> [[IN]], splat (i1 true) return _knot_mask32(a); } __mmask64 test_knot_mask64(__mmask64 a) { // CHECK-LABEL: @test_knot_mask64 // CHECK: [[IN:%.*]] = bitcast i64 %{{.*}} to <64 x i1> - // CHECK: [[NOT:%.*]] = xor <64 x i1> [[IN]], + // CHECK: [[NOT:%.*]] = xor <64 x i1> [[IN]], splat (i1 true) return _knot_mask64(a); } @@ -42,7 +42,7 @@ __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, // CHECK-LABEL: @test_kandn_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> - // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = and <32 x i1> [[NOT]], [[RHS]] return _mm512_mask_cmpneq_epu16_mask(_kandn_mask32(_mm512_cmpneq_epu16_mask(__A, __B), _mm512_cmpneq_epu16_mask(__C, __D)), @@ -53,7 +53,7 @@ __mmask64 test_kandn_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, // CHECK-LABEL: @test_kandn_mask64 // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> - // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = and <64 x i1> [[NOT]], [[RHS]] return _mm512_mask_cmpneq_epu8_mask(_kandn_mask64(_mm512_cmpneq_epu8_mask(__A, __B), _mm512_cmpneq_epu8_mask(__C, __D)), @@ -84,7 +84,7 @@ __mmask32 test_kxnor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, // CHECK-LABEL: @test_kxnor_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> - // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = xor <32 x i1> [[NOT]], [[RHS]] return _mm512_mask_cmpneq_epu16_mask(_kxnor_mask32(_mm512_cmpneq_epu16_mask(__A, __B), _mm512_cmpneq_epu16_mask(__C, __D)), @@ -95,7 +95,7 @@ __mmask64 test_kxnor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, // CHECK-LABEL: @test_kxnor_mask64 // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> - // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = xor <64 x i1> [[NOT]], [[RHS]] return _mm512_mask_cmpneq_epu8_mask(_kxnor_mask64(_mm512_cmpneq_epu8_mask(__A, __B), _mm512_cmpneq_epu8_mask(__C, __D)), diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index b61c3eb3d54ad1..1ebd3696bce54f 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -6,7 +6,7 @@ __mmask8 test_knot_mask8(__mmask8 a) { // CHECK-LABEL: @test_knot_mask8 // CHECK: [[IN:%.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: [[NOT:%.*]] = xor <8 x i1> [[IN]], + // CHECK: [[NOT:%.*]] = xor <8 x i1> [[IN]], splat (i1 true) return _knot_mask8(a); } @@ -24,7 +24,7 @@ __mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ // CHECK-LABEL: @test_kandn_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = and <8 x i1> [[NOT]], [[RHS]] return _mm512_mask_cmpneq_epu64_mask(_kandn_mask8(_mm512_cmpneq_epu64_mask(__A, __B), _mm512_cmpneq_epu64_mask(__C, __D)), @@ -45,7 +45,7 @@ __mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ // CHECK-LABEL: @test_kxnor_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> - // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = xor <8 x i1> [[NOT]], [[RHS]] return _mm512_mask_cmpneq_epu64_mask(_kxnor_mask8(_mm512_cmpneq_epu64_mask(__A, __B), _mm512_cmpneq_epu64_mask(__C, __D)), @@ -388,14 +388,14 @@ __m512 test_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { __m512d test_mm512_andnot_pd (__m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_andnot_pd - // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <8 x i64> return (__m512d) _mm512_andnot_pd(__A, __B); } __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_andnot_pd - // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <8 x i64> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_mask_andnot_pd(__W, __U, __A, __B); @@ -403,7 +403,7 @@ __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m51 __m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_andnot_pd - // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <8 x i64> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_maskz_andnot_pd(__U, __A, __B); @@ -411,14 +411,14 @@ __m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { __m512 test_mm512_andnot_ps (__m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_andnot_ps - // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <16 x i32> return (__m512) _mm512_andnot_ps(__A, __B); } __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_andnot_ps - // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_mask_andnot_ps(__W, __U, __A, __B); @@ -426,7 +426,7 @@ __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __m512 test_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_maskz_andnot_ps - // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_maskz_andnot_ps(__U, __A, __B); diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 372790a8cd668b..84e700cfbd3785 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -440,7 +440,7 @@ __mmask16 test_mm512_knot(__mmask16 a) { // CHECK-LABEL: test_mm512_knot // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], splat (i1 true) // CHECK: bitcast <16 x i1> [[NOT]] to i16 return _mm512_knot(a); } @@ -2844,7 +2844,7 @@ __m512i test_mm512_xor_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __ __m512i test_mm512_maskz_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B){ // CHECK-LABEL: test_mm512_maskz_andnot_epi32 - // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_andnot_epi32(__k,__A,__B); @@ -2853,7 +2853,7 @@ __m512i test_mm512_maskz_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B){ __m512i test_mm512_mask_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { // CHECK-LABEL: test_mm512_mask_andnot_epi32 - // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_andnot_epi32(__src,__k,__A,__B); @@ -2863,7 +2863,7 @@ __m512i test_mm512_andnot_si512(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_andnot_si512 //CHECK: load {{.*}}%__A.addr.i, align 64 - //CHECK: %not.i = xor{{.*}}, + //CHECK: %not.i = xor{{.*}}, splat (i64 -1) //CHECK: load {{.*}}%__B.addr.i, align 64 //CHECK: and <8 x i64> %not.i,{{.*}} @@ -2872,14 +2872,14 @@ __m512i test_mm512_andnot_si512(__m512i __A, __m512i __B) __m512i test_mm512_andnot_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_andnot_epi32 - // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <16 x i32> %{{.*}}, %{{.*}} return _mm512_andnot_epi32(__A,__B); } __m512i test_mm512_maskz_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_andnot_epi64 - // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <8 x i64> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_andnot_epi64(__k,__A,__B); @@ -2888,7 +2888,7 @@ __m512i test_mm512_maskz_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { __m512i test_mm512_mask_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_andnot_epi64 - // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <8 x i64> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_andnot_epi64(__src,__k,__A,__B); @@ -2896,7 +2896,7 @@ __m512i test_mm512_mask_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i test_mm512_andnot_epi64(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_andnot_epi64 - // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <8 x i64> %{{.*}}, %{{.*}} return _mm512_andnot_epi64(__A,__B); } @@ -2987,20 +2987,20 @@ __m512i test_mm512_add_epi64(__m512i __A, __m512i __B) { __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mul_epi32 - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, + //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} return _mm512_mul_epi32(__A,__B); } __m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_mul_epi32 - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, + //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_mul_epi32(__k,__A,__B); @@ -3008,10 +3008,10 @@ __m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) { __m512i test_mm512_mask_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_mul_epi32 - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, - //CHECK: shl <8 x i64> %{{.*}}, - //CHECK: ashr <8 x i64> %{{.*}}, + //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_mul_epi32(__src,__k,__A,__B); @@ -3019,16 +3019,16 @@ __m512i test_mm512_mask_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B, __m512 __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mul_epu32 - //CHECK: and <8 x i64> %{{.*}}, - //CHECK: and <8 x i64> %{{.*}}, + //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} return _mm512_mul_epu32(__A,__B); } __m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_mul_epu32 - //CHECK: and <8 x i64> %{{.*}}, - //CHECK: and <8 x i64> %{{.*}}, + //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_mul_epu32(__k,__A,__B); @@ -3036,8 +3036,8 @@ __m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) { __m512i test_mm512_mask_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_mul_epu32 - //CHECK: and <8 x i64> %{{.*}}, - //CHECK: and <8 x i64> %{{.*}}, + //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_mul_epu32(__src,__k,__A,__B); @@ -8299,7 +8299,7 @@ __mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ // CHECK-LABEL: test_mm512_kandn // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]] // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_mm512_kandn(_mm512_cmpneq_epu32_mask(__A, __B), @@ -8405,7 +8405,7 @@ __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ // CHECK-LABEL: test_mm512_kxnor // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]] // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_mm512_kxnor(_mm512_cmpneq_epu32_mask(__A, __B), @@ -8427,7 +8427,7 @@ __mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __mmask16 test_knot_mask16(__mmask16 a) { // CHECK-LABEL: test_knot_mask16 // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], splat (i1 true) // CHECK: bitcast <16 x i1> [[NOT]] to i16 return _knot_mask16(a); } @@ -8447,7 +8447,7 @@ __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, // CHECK-LABEL: test_kandn_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]] // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_kandn_mask16(_mm512_cmpneq_epu32_mask(__A, __B), @@ -8470,7 +8470,7 @@ __mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, // CHECK-LABEL: test_kxnor_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> - // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], splat (i1 true) // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]] // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_kxnor_mask16(_mm512_cmpneq_epu32_mask(__A, __B), diff --git a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c index 998177b1e72e54..1e804c3db57d61 100644 --- a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c @@ -88,121 +88,121 @@ __m512i test_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const* __P) { __m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shldi_epi64 - // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> ) + // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 47)) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shldi_epi64(__S, __U, __A, __B, 47); } __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shldi_epi64 - // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> ) + // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 63)) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shldi_epi64(__U, __A, __B, 63); } __m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shldi_epi64 - // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> ) + // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31)) return _mm512_shldi_epi64(__A, __B, 31); } __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shldi_epi32 - // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> ) + // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 7)) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shldi_epi32(__S, __U, __A, __B, 7); } __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shldi_epi32 - // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> ) + // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 15)) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shldi_epi32(__U, __A, __B, 15); } __m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shldi_epi32 - // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> ) + // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31)) return _mm512_shldi_epi32(__A, __B, 31); } __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shldi_epi16 - // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> ) + // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 3)) // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shldi_epi16(__S, __U, __A, __B, 3); } __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shldi_epi16 - // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> ) + // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 7)) // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shldi_epi16(__U, __A, __B, 7); } __m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shldi_epi16 - // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> ) + // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15)) return _mm512_shldi_epi16(__A, __B, 15); } __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shrdi_epi64 - // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> ) + // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 47)) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shrdi_epi64(__S, __U, __A, __B, 47); } __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shrdi_epi64 - // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> ) + // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 63)) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shrdi_epi64(__U, __A, __B, 63); } __m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shrdi_epi64 - // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> ) + // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31)) return _mm512_shrdi_epi64(__A, __B, 31); } __m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shrdi_epi32 - // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> ) + // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 7)) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shrdi_epi32(__S, __U, __A, __B, 7); } __m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shrdi_epi32 - // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> ) + // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 15)) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shrdi_epi32(__U, __A, __B, 15); } __m512i test_mm512_shrdi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shrdi_epi32 - // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> ) + // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31)) return _mm512_shrdi_epi32(__A, __B, 31); } __m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shrdi_epi16 - // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> ) + // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 3)) // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shrdi_epi16(__S, __U, __A, __B, 3); } __m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shrdi_epi16 - // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> ) + // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15)) // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shrdi_epi16(__U, __A, __B, 15); } __m512i test_mm512_shrdi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shrdi_epi16 - // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> ) + // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 31)) return _mm512_shrdi_epi16(__A, __B, 31); } diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 6f544c21e798de..1c2d467a474288 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -726,10 +726,10 @@ __m128i test_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { __m256i test_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_mask_mul_epi32 - //CHECK: shl <4 x i64> %{{.*}}, - //CHECK: ashr <4 x i64> %{{.*}}, - //CHECK: shl <4 x i64> %{{.*}}, - //CHECK: ashr <4 x i64> %{{.*}}, + //CHECK: shl <4 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <4 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <4 x i64> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_mul_epi32(__W, __M, __X, __Y); @@ -737,10 +737,10 @@ __m256i test_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i test_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_maskz_mul_epi32 - //CHECK: shl <4 x i64> %{{.*}}, - //CHECK: ashr <4 x i64> %{{.*}}, - //CHECK: shl <4 x i64> %{{.*}}, - //CHECK: ashr <4 x i64> %{{.*}}, + //CHECK: shl <4 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <4 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <4 x i64> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_mul_epi32(__M, __X, __Y); @@ -750,10 +750,10 @@ __m256i test_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) { __m128i test_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_mask_mul_epi32 - //CHECK: shl <2 x i64> %{{.*}}, - //CHECK: ashr <2 x i64> %{{.*}}, - //CHECK: shl <2 x i64> %{{.*}}, - //CHECK: ashr <2 x i64> %{{.*}}, + //CHECK: shl <2 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <2 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <2 x i64> %{{.*}}, %{{.*}} //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_mul_epi32(__W, __M, __X, __Y); @@ -761,10 +761,10 @@ __m128i test_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i test_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_maskz_mul_epi32 - //CHECK: shl <2 x i64> %{{.*}}, - //CHECK: ashr <2 x i64> %{{.*}}, - //CHECK: shl <2 x i64> %{{.*}}, - //CHECK: ashr <2 x i64> %{{.*}}, + //CHECK: shl <2 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32) + //CHECK: shl <2 x i64> %{{.*}}, splat (i64 32) + //CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32) //CHECK: mul <2 x i64> %{{.*}}, %{{.*}} //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_mul_epi32(__M, __X, __Y); @@ -773,8 +773,8 @@ __m128i test_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) { __m256i test_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_mask_mul_epu32 - //CHECK: and <4 x i64> %{{.*}}, - //CHECK: and <4 x i64> %{{.*}}, + //CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <4 x i64> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_mul_epu32(__W, __M, __X, __Y); @@ -782,8 +782,8 @@ __m256i test_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i test_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: @test_mm256_maskz_mul_epu32 - //CHECK: and <4 x i64> %{{.*}}, - //CHECK: and <4 x i64> %{{.*}}, + //CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <4 x i64> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_mul_epu32(__M, __X, __Y); @@ -792,8 +792,8 @@ __m256i test_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) { __m128i test_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_mask_mul_epu32 - //CHECK: and <2 x i64> %{{.*}}, - //CHECK: and <2 x i64> %{{.*}}, + //CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <2 x i64> %{{.*}}, %{{.*}} //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_mul_epu32(__W, __M, __X, __Y); @@ -801,8 +801,8 @@ __m128i test_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, __m128i test_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) { //CHECK-LABEL: @test_mm_maskz_mul_epu32 - //CHECK: and <2 x i64> %{{.*}}, - //CHECK: and <2 x i64> %{{.*}}, + //CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) + //CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) //CHECK: mul <2 x i64> %{{.*}}, %{{.*}} //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_mul_epu32(__M, __X, __Y); @@ -880,14 +880,14 @@ __m128i test_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { __m256i test_mm256_andnot_epi32 (__m256i __A, __m256i __B) { //CHECK-LABEL: @test_mm256_andnot_epi32 - //CHECK: xor <8 x i32> %{{.*}}, + //CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1) //CHECK: and <8 x i32> %{{.*}}, %{{.*}} return _mm256_andnot_epi32(__A, __B); } __m256i test_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: @test_mm256_mask_andnot_epi32 - //CHECK: xor <8 x i32> %{{.*}}, + //CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1) //CHECK: and <8 x i32> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_andnot_epi32(__W, __U, __A, __B); @@ -895,7 +895,7 @@ __m256i test_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __ __m256i test_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: @test_mm256_maskz_andnot_epi32 - //CHECK: xor <8 x i32> %{{.*}}, + //CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1) //CHECK: and <8 x i32> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_andnot_epi32(__U, __A, __B); @@ -903,14 +903,14 @@ __m256i test_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { __m128i test_mm_andnot_epi32 (__m128i __A, __m128i __B) { //CHECK-LABEL: @test_mm_andnot_epi32 - //CHECK: xor <4 x i32> %{{.*}}, + //CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) //CHECK: and <4 x i32> %{{.*}}, %{{.*}} return _mm_andnot_epi32(__A, __B); } __m128i test_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: @test_mm_mask_andnot_epi32 - //CHECK: xor <4 x i32> %{{.*}}, + //CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) //CHECK: and <4 x i32> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_andnot_epi32(__W, __U, __A, __B); @@ -918,7 +918,7 @@ __m128i test_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m12 __m128i test_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: @test_mm_maskz_andnot_epi32 - //CHECK: xor <4 x i32> %{{.*}}, + //CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) //CHECK: and <4 x i32> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_andnot_epi32(__U, __A, __B); @@ -1046,14 +1046,14 @@ __m128i test_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { __m256i test_mm256_andnot_epi64 (__m256i __A, __m256i __B) { //CHECK-LABEL: @test_mm256_andnot_epi64 - //CHECK: xor <4 x i64> %{{.*}}, + //CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1) //CHECK: and <4 x i64> %{{.*}}, %{{.*}} return _mm256_andnot_epi64(__A, __B); } __m256i test_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: @test_mm256_mask_andnot_epi64 - //CHECK: xor <4 x i64> %{{.*}}, + //CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1) //CHECK: and <4 x i64> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_andnot_epi64(__W, __U, __A, __B); @@ -1061,7 +1061,7 @@ __m256i test_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __ __m256i test_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: @test_mm256_maskz_andnot_epi64 - //CHECK: xor <4 x i64> %{{.*}}, + //CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1) //CHECK: and <4 x i64> %{{.*}}, %{{.*}} //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_andnot_epi64(__U, __A, __B); @@ -1069,14 +1069,14 @@ __m256i test_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { __m128i test_mm_andnot_epi64 (__m128i __A, __m128i __B) { //CHECK-LABEL: @test_mm_andnot_epi64 - //CHECK: xor <2 x i64> %{{.*}}, + //CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) //CHECK: and <2 x i64> %{{.*}}, %{{.*}} return _mm_andnot_epi64(__A, __B); } __m128i test_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: @test_mm_mask_andnot_epi64 - //CHECK: xor <2 x i64> %{{.*}}, + //CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) //CHECK: and <2 x i64> %{{.*}}, %{{.*}} //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_andnot_epi64(__W,__U, __A, __B); @@ -1084,7 +1084,7 @@ __m128i test_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m12 __m128i test_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: @test_mm_maskz_andnot_epi64 - //CHECK: xor <2 x i64> %{{.*}}, + //CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) //CHECK: and <2 x i64> %{{.*}}, %{{.*}} //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_andnot_epi64(__U, __A, __B); diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c index f705bf7aa16f86..cdbd19a91211b6 100644 --- a/clang/test/CodeGen/X86/avx512vldq-builtins.c +++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c @@ -45,7 +45,7 @@ __m128i test_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { __m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_andnot_pd - // CHECK: xor <4 x i64> %{{.*}}, + // CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <4 x i64> %{{.*}}, %{{.*}} // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_mask_andnot_pd ( __W, __U, __A, __B); @@ -53,7 +53,7 @@ __m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m25 __m256d test_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_maskz_andnot_pd - // CHECK: xor <4 x i64> %{{.*}}, + // CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <4 x i64> %{{.*}}, %{{.*}} // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_maskz_andnot_pd (__U, __A, __B); @@ -61,7 +61,7 @@ __m256d test_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { __m128d test_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_andnot_pd - // CHECK: xor <2 x i64> %{{.*}}, + // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <2 x i64> %{{.*}}, %{{.*}} // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_mask_andnot_pd ( __W, __U, __A, __B); @@ -69,7 +69,7 @@ __m128d test_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __m128d test_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_maskz_andnot_pd - // CHECK: xor <2 x i64> %{{.*}}, + // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <2 x i64> %{{.*}}, %{{.*}} // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_maskz_andnot_pd (__U, __A, __B); @@ -77,7 +77,7 @@ __m128d test_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { __m256 test_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_andnot_ps - // CHECK: xor <8 x i32> %{{.*}}, + // CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <8 x i32> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_mask_andnot_ps ( __W, __U, __A, __B); @@ -85,7 +85,7 @@ __m256 test_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 _ __m256 test_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_maskz_andnot_ps - // CHECK: xor <8 x i32> %{{.*}}, + // CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <8 x i32> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_maskz_andnot_ps (__U, __A, __B); @@ -93,7 +93,7 @@ __m256 test_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { __m128 test_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_andnot_ps - // CHECK: xor <4 x i32> %{{.*}}, + // CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <4 x i32> %{{.*}}, %{{.*}} // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_mask_andnot_ps ( __W, __U, __A, __B); @@ -101,7 +101,7 @@ __m128 test_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) __m128 test_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_maskz_andnot_ps - // CHECK: xor <4 x i32> %{{.*}}, + // CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <4 x i32> %{{.*}}, %{{.*}} // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_maskz_andnot_ps (__U, __A, __B); diff --git a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c index a569293ddffc13..5760c71790debc 100644 --- a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c @@ -172,241 +172,241 @@ __m256i test_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const* __P) { __m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shldi_epi64 - // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> ) + // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 47)) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shldi_epi64(__S, __U, __A, __B, 47); } __m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shldi_epi64 - // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> ) + // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 63)) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shldi_epi64(__U, __A, __B, 63); } __m256i test_mm256_shldi_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shldi_epi64 - // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> ) + // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31)) return _mm256_shldi_epi64(__A, __B, 31); } __m128i test_mm_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shldi_epi64 - // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 47)) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shldi_epi64(__S, __U, __A, __B, 47); } __m128i test_mm_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shldi_epi64 - // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 63)) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shldi_epi64(__U, __A, __B, 63); } __m128i test_mm_shldi_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shldi_epi64 - // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31)) return _mm_shldi_epi64(__A, __B, 31); } __m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shldi_epi32 - // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> ) + // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 7)) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shldi_epi32(__S, __U, __A, __B, 7); } __m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shldi_epi32 - // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> ) + // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 15)) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shldi_epi32(__U, __A, __B, 15); } __m256i test_mm256_shldi_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shldi_epi32 - // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> ) + // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31)) return _mm256_shldi_epi32(__A, __B, 31); } __m128i test_mm_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shldi_epi32 - // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 7)) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shldi_epi32(__S, __U, __A, __B, 7); } __m128i test_mm_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shldi_epi32 - // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 15)) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shldi_epi32(__U, __A, __B, 15); } __m128i test_mm_shldi_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shldi_epi32 - // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31)) return _mm_shldi_epi32(__A, __B, 31); } __m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shldi_epi16 - // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> ) + // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 3)) // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shldi_epi16(__S, __U, __A, __B, 3); } __m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shldi_epi16 - // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> ) + // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 7)) // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shldi_epi16(__U, __A, __B, 7); } __m256i test_mm256_shldi_epi16(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shldi_epi16 - // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> ) + // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31)) return _mm256_shldi_epi16(__A, __B, 31); } __m128i test_mm_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shldi_epi16 - // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 3)) // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shldi_epi16(__S, __U, __A, __B, 3); } __m128i test_mm_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shldi_epi16 - // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 7)) // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shldi_epi16(__U, __A, __B, 7); } __m128i test_mm_shldi_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shldi_epi16 - // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31)) return _mm_shldi_epi16(__A, __B, 31); } __m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shrdi_epi64 - // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> ) + // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 47)) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shrdi_epi64(__S, __U, __A, __B, 47); } __m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shrdi_epi64 - // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> ) + // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 63)) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shrdi_epi64(__U, __A, __B, 63); } __m256i test_mm256_shrdi_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shrdi_epi64 - // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> ) + // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31) return _mm256_shrdi_epi64(__A, __B, 31); } __m128i test_mm_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shrdi_epi64 - // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 47)) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shrdi_epi64(__S, __U, __A, __B, 47); } __m128i test_mm_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shrdi_epi64 - // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 63)) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shrdi_epi64(__U, __A, __B, 63); } __m128i test_mm_shrdi_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shrdi_epi64 - // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31)) return _mm_shrdi_epi64(__A, __B, 31); } __m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shrdi_epi32 - // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> ) + // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 7)) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shrdi_epi32(__S, __U, __A, __B, 7); } __m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shrdi_epi32 - // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> ) + // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 15)) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shrdi_epi32(__U, __A, __B, 15); } __m256i test_mm256_shrdi_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shrdi_epi32 - // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> ) + // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31) return _mm256_shrdi_epi32(__A, __B, 31); } __m128i test_mm_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shrdi_epi32 - // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 7)) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shrdi_epi32(__S, __U, __A, __B, 7); } __m128i test_mm_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shrdi_epi32 - // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 15)) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shrdi_epi32(__U, __A, __B, 15); } __m128i test_mm_shrdi_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shrdi_epi32 - // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31)) return _mm_shrdi_epi32(__A, __B, 31); } __m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shrdi_epi16 - // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> ) + // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 3)) // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shrdi_epi16(__S, __U, __A, __B, 3); } __m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shrdi_epi16 - // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> ) + // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 7)) // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shrdi_epi16(__U, __A, __B, 7); } __m256i test_mm256_shrdi_epi16(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shrdi_epi16 - // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> + // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31)) return _mm256_shrdi_epi16(__A, __B, 31); } __m128i test_mm_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shrdi_epi16 - // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 3)) // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shrdi_epi16(__S, __U, __A, __B, 3); } __m128i test_mm_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shrdi_epi16 - // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 7)) // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shrdi_epi16(__U, __A, __B, 7); } __m128i test_mm_shrdi_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shrdi_epi16 - // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31)) return _mm_shrdi_epi16(__A, __B, 31); } diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 280faa0274cb85..2bb318177e4d59 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -86,7 +86,7 @@ __m64 test_mm_and_si64(__m64 a, __m64 b) { __m64 test_mm_andnot_si64(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_andnot_si64 - // CHECK: [[TMP:%.*]] = xor <1 x i64> {{%.*}}, + // CHECK: [[TMP:%.*]] = xor <1 x i64> {{%.*}}, splat (i64 -1) // CHECK: and <1 x i64> [[TMP]], {{%.*}} return _mm_andnot_si64(a, b); } @@ -333,8 +333,8 @@ int test_mm_movemask_pi8(__m64 a) { __m64 test_mm_mul_su32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_mul_su32 - // CHECK: and <2 x i64> {{%.*}}, - // CHECK: and <2 x i64> {{%.*}}, + // CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295) + // CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295) // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_su32(a, b); } diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index f779ab07a26640..104bfea05469e8 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -35,7 +35,7 @@ TEST_CONSTEXPR(match_m128(_mm_and_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m1 __m128 test_mm_andnot_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_andnot_ps - // CHECK: xor <4 x i32> %{{.*}}, + // CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1) // CHECK: and <4 x i32> return _mm_andnot_ps(A, B); } diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index c4493a49120543..adcb6876f7f55d 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -99,7 +99,7 @@ __m128i test_mm_and_si128(__m128i A, __m128i B) { __m128d test_mm_andnot_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_andnot_pd - // CHECK: xor <2 x i64> %{{.*}}, + // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <2 x i64> return _mm_andnot_pd(A, B); } @@ -107,7 +107,7 @@ TEST_CONSTEXPR(match_m128d(_mm_andnot_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, __m128i test_mm_andnot_si128(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_andnot_si128 - // CHECK: xor <2 x i64> %{{.*}}, + // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1) // CHECK: and <2 x i64> return _mm_andnot_si128(A, B); } @@ -906,8 +906,8 @@ int test_mm_movemask_pd(__m128d A) { __m128i test_mm_mul_epu32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_mul_epu32 - // CHECK: and <2 x i64> %{{.*}}, - // CHECK: and <2 x i64> %{{.*}}, + // CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) + // CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295) // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_epu32(A, B); } diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 340874a4fdd3c4..d71a4b7e789ef8 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -312,10 +312,10 @@ __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) { __m128i test_mm_mul_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_mul_epi32 - // CHECK: shl <2 x i64> %{{.*}}, - // CHECK: ashr <2 x i64> %{{.*}}, - // CHECK: shl <2 x i64> %{{.*}}, - // CHECK: ashr <2 x i64> %{{.*}}, + // CHECK: shl <2 x i64> %{{.*}}, splat (i64 32) + // CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32) + // CHECK: shl <2 x i64> %{{.*}}, splat (i64 32) + // CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32) // CHECK: mul <2 x i64> %{{.*}}, %{{.*}} return _mm_mul_epi32(x, y); } diff --git a/clang/test/CodeGen/X86/xop-builtins-cmp.c b/clang/test/CodeGen/X86/xop-builtins-cmp.c index af7bc6ce25c4e4..76413df5c8a201 100644 --- a/clang/test/CodeGen/X86/xop-builtins-cmp.c +++ b/clang/test/CodeGen/X86/xop-builtins-cmp.c @@ -408,48 +408,48 @@ __m128i test_mm_comfalse_epi64(__m128i a, __m128i b) { __m128i test_mm_comtrue_epu8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epu8 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epu8(a, b); } __m128i test_mm_comtrue_epu16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epu16 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epu16(a, b); } __m128i test_mm_comtrue_epu32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epu32 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epu32(a, b); } __m128i test_mm_comtrue_epu64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epu64 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epu64(a, b); } __m128i test_mm_comtrue_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epi8 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epi8(a, b); } __m128i test_mm_comtrue_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epi16 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epi16(a, b); } __m128i test_mm_comtrue_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epi32 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epi32(a, b); } __m128i test_mm_comtrue_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_comtrue_epi64 - // CHECK: ret <2 x i64> + // CHECK: ret <2 x i64> splat (i64 -1) return _mm_comtrue_epi64(a, b); } diff --git a/clang/test/CodeGen/X86/xop-builtins.c b/clang/test/CodeGen/X86/xop-builtins.c index 113af58a69339d..8ba6b8bc5915b1 100644 --- a/clang/test/CodeGen/X86/xop-builtins.c +++ b/clang/test/CodeGen/X86/xop-builtins.c @@ -173,7 +173,7 @@ __m128i test_mm_hsubq_epi32(__m128i a) { __m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) { // CHECK-LABEL: test_mm_cmov_si128 // CHECK: [[AND:%.*]] = and <2 x i64> %{{.*}}, %{{.*}} - // CHECK: [[NEG:%.*]] = xor <2 x i64> %{{.*}}, + // CHECK: [[NEG:%.*]] = xor <2 x i64> %{{.*}}, splat (i64 -1) // CHECK-NEXT: [[ANDN:%.*]] = and <2 x i64> %{{.*}}, [[NEG]] // CHECK-NEXT: %{{.*}} = or <2 x i64> [[AND]], [[ANDN]] return _mm_cmov_si128(a, b, c); @@ -182,7 +182,7 @@ __m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) { __m256i test_mm256_cmov_si256(__m256i a, __m256i b, __m256i c) { // CHECK-LABEL: test_mm256_cmov_si256 // CHECK: [[AND:%.*]] = and <4 x i64> %{{.*}}, %{{.*}} - // CHECK: [[NEG:%.*]] = xor <4 x i64> %{{.*}}, + // CHECK: [[NEG:%.*]] = xor <4 x i64> %{{.*}}, splat (i64 -1) // CHECK-NEXT: [[ANDN:%.*]] = and <4 x i64> %{{.*}}, [[NEG]] // CHECK-NEXT: %{{.*}} = or <4 x i64> [[AND]], [[ANDN]] return _mm256_cmov_si256(a, b, c); @@ -220,25 +220,25 @@ __m128i test_mm_rot_epi64(__m128i a, __m128i b) { __m128i test_mm_roti_epi8(__m128i a) { // CHECK-LABEL: test_mm_roti_epi8 - // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> ) + // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> splat (i8 1)) return _mm_roti_epi8(a, 1); } __m128i test_mm_roti_epi16(__m128i a) { // CHECK-LABEL: test_mm_roti_epi16 - // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> ) + // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 50)) return _mm_roti_epi16(a, 50); } __m128i test_mm_roti_epi32(__m128i a) { // CHECK-LABEL: test_mm_roti_epi32 - // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> ) + // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 226)) return _mm_roti_epi32(a, -30); } __m128i test_mm_roti_epi64(__m128i a) { // CHECK-LABEL: test_mm_roti_epi64 - // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> ) + // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 100)) return _mm_roti_epi64(a, 100); } diff --git a/clang/test/CodeGen/aarch64-neon-3v.c b/clang/test/CodeGen/aarch64-neon-3v.c index 9ed439379722bc..3580f6d38dc36a 100644 --- a/clang/test/CodeGen/aarch64-neon-3v.c +++ b/clang/test/CodeGen/aarch64-neon-3v.c @@ -341,7 +341,7 @@ uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: define{{.*}} <8 x i8> @test_vbic_s8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[AND_I]] int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) { @@ -349,7 +349,7 @@ int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) { } // CHECK-LABEL: define{{.*}} <16 x i8> @test_vbicq_s8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[AND_I]] int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) { @@ -357,7 +357,7 @@ int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) { } // CHECK-LABEL: define{{.*}} <4 x i16> @test_vbic_s16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[AND_I]] int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) { @@ -365,7 +365,7 @@ int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) { } // CHECK-LABEL: define{{.*}} <8 x i16> @test_vbicq_s16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[AND_I]] int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) { @@ -373,7 +373,7 @@ int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) { } // CHECK-LABEL: define{{.*}} <2 x i32> @test_vbic_s32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[AND_I]] int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) { @@ -381,7 +381,7 @@ int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: define{{.*}} <4 x i32> @test_vbicq_s32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[AND_I]] int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) { @@ -389,7 +389,7 @@ int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbic_s64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[AND_I]] int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) { @@ -397,7 +397,7 @@ int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbicq_s64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[AND_I]] int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { @@ -405,7 +405,7 @@ int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: define{{.*}} <8 x i8> @test_vbic_u8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[AND_I]] uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) { @@ -413,7 +413,7 @@ uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) { } // CHECK-LABEL: define{{.*}} <16 x i8> @test_vbicq_u8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[AND_I]] uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) { @@ -421,7 +421,7 @@ uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) { } // CHECK-LABEL: define{{.*}} <4 x i16> @test_vbic_u16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[AND_I]] uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) { @@ -429,7 +429,7 @@ uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) { } // CHECK-LABEL: define{{.*}} <8 x i16> @test_vbicq_u16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[AND_I]] uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) { @@ -437,7 +437,7 @@ uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) { } // CHECK-LABEL: define{{.*}} <2 x i32> @test_vbic_u32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[AND_I]] uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) { @@ -445,7 +445,7 @@ uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: define{{.*}} <4 x i32> @test_vbicq_u32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[AND_I]] uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) { @@ -453,7 +453,7 @@ uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbic_u64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[AND_I]] uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) { @@ -461,7 +461,7 @@ uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbicq_u64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[AND_I]] uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) { @@ -469,7 +469,7 @@ uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: define{{.*}} <8 x i8> @test_vorn_s8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[OR_I]] int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) { @@ -477,7 +477,7 @@ int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) { } // CHECK-LABEL: define{{.*}} <16 x i8> @test_vornq_s8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[OR_I]] int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) { @@ -485,7 +485,7 @@ int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) { } // CHECK-LABEL: define{{.*}} <4 x i16> @test_vorn_s16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[OR_I]] int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) { @@ -493,7 +493,7 @@ int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) { } // CHECK-LABEL: define{{.*}} <8 x i16> @test_vornq_s16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[OR_I]] int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) { @@ -501,7 +501,7 @@ int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) { } // CHECK-LABEL: define{{.*}} <2 x i32> @test_vorn_s32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[OR_I]] int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) { @@ -509,7 +509,7 @@ int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: define{{.*}} <4 x i32> @test_vornq_s32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[OR_I]] int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) { @@ -517,7 +517,7 @@ int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: define{{.*}} <1 x i64> @test_vorn_s64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[OR_I]] int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) { @@ -525,7 +525,7 @@ int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: define{{.*}} <2 x i64> @test_vornq_s64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[OR_I]] int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { @@ -533,7 +533,7 @@ int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: define{{.*}} <8 x i8> @test_vorn_u8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[OR_I]] uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) { @@ -541,7 +541,7 @@ uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) { } // CHECK-LABEL: define{{.*}} <16 x i8> @test_vornq_u8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[OR_I]] uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) { @@ -549,7 +549,7 @@ uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) { } // CHECK-LABEL: define{{.*}} <4 x i16> @test_vorn_u16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[OR_I]] uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) { @@ -557,7 +557,7 @@ uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) { } // CHECK-LABEL: define{{.*}} <8 x i16> @test_vornq_u16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[OR_I]] uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) { @@ -565,7 +565,7 @@ uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) { } // CHECK-LABEL: define{{.*}} <2 x i32> @test_vorn_u32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[OR_I]] uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) { @@ -573,7 +573,7 @@ uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: define{{.*}} <4 x i32> @test_vornq_u32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[OR_I]] uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) { @@ -581,7 +581,7 @@ uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: define{{.*}} <1 x i64> @test_vorn_u64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[OR_I]] uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) { @@ -589,7 +589,7 @@ uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: define{{.*}} <2 x i64> @test_vornq_u64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[OR_I]] uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) { diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index ef1623bb17003a..271ae056308d2f 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -960,7 +960,7 @@ float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) { // CHECK-LABEL: @test_vbsl_s8( // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, +// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1) // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] // CHECK: ret <8 x i8> [[VBSL2_I]] @@ -973,7 +973,7 @@ int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, +// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1) // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8> @@ -987,7 +987,7 @@ int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, +// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, splat (i32 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i32> [[VBSL5_I]] @@ -1000,7 +1000,7 @@ int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, +// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <1 x i64> [[VBSL5_I]] @@ -1010,7 +1010,7 @@ int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) { // CHECK-LABEL: @test_vbsl_u8( // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, +// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1) // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] // CHECK: ret <8 x i8> [[VBSL2_I]] @@ -1023,7 +1023,7 @@ uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, +// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1) // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <4 x i16> [[VBSL5_I]] @@ -1036,7 +1036,7 @@ uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, +// CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, splat (i32 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i32> [[VBSL5_I]] @@ -1049,7 +1049,7 @@ uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, +// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <1 x i64> [[VBSL5_I]] @@ -1064,7 +1064,7 @@ uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]] -// CHECK: [[TMP4:%.*]] = xor <2 x i32> %v1, +// CHECK: [[TMP4:%.*]] = xor <2 x i32> %v1, splat (i32 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float> @@ -1080,7 +1080,7 @@ float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) { // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, +// CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double> @@ -1091,7 +1091,7 @@ float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) { // CHECK-LABEL: @test_vbsl_p8( // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, +// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1) // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] // CHECK: ret <8 x i8> [[VBSL2_I]] @@ -1104,7 +1104,7 @@ poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, +// CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1) // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <4 x i16> [[VBSL5_I]] @@ -1114,7 +1114,7 @@ poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) { // CHECK-LABEL: @test_vbslq_s8( // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, +// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1) // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] // CHECK: ret <16 x i8> [[VBSL2_I]] @@ -1127,7 +1127,7 @@ int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, +// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1) // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <8 x i16> [[VBSL5_I]] @@ -1140,7 +1140,7 @@ int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, +// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1) // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <4 x i32> [[VBSL5_I]] @@ -1153,7 +1153,7 @@ int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, +// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i64> [[VBSL5_I]] @@ -1163,7 +1163,7 @@ int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) { // CHECK-LABEL: @test_vbslq_u8( // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, +// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1) // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] // CHECK: ret <16 x i8> [[VBSL2_I]] @@ -1176,7 +1176,7 @@ uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, +// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1) // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <8 x i16> [[VBSL5_I]] @@ -1189,7 +1189,7 @@ uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, +// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1) // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <4 x i32> [[VBSL5_I]] @@ -1202,7 +1202,7 @@ int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, +// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i64> [[VBSL5_I]] @@ -1217,7 +1217,7 @@ uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) { // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, +// CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1) // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float> @@ -1228,7 +1228,7 @@ float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) { // CHECK-LABEL: @test_vbslq_p8( // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 -// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, +// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1) // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] // CHECK: ret <16 x i8> [[VBSL2_I]] @@ -1241,7 +1241,7 @@ poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 -// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, +// CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1) // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <8 x i16> [[VBSL5_I]] @@ -1256,7 +1256,7 @@ poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) { // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]] -// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, +// CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double> @@ -4600,7 +4600,7 @@ float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) { } // CHECK-LABEL: @test_vshl_n_s8( -// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 3) // CHECK: ret <8 x i8> [[VSHL_N]] int8x8_t test_vshl_n_s8(int8x8_t a) { return vshl_n_s8(a, 3); @@ -4609,7 +4609,7 @@ int8x8_t test_vshl_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vshl_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <4 x i16> [[VSHL_N]] int16x4_t test_vshl_n_s16(int16x4_t a) { return vshl_n_s16(a, 3); @@ -4618,14 +4618,14 @@ int16x4_t test_vshl_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vshl_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <2 x i32> [[VSHL_N]] int32x2_t test_vshl_n_s32(int32x2_t a) { return vshl_n_s32(a, 3); } // CHECK-LABEL: @test_vshlq_n_s8( -// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 3) // CHECK: ret <16 x i8> [[VSHL_N]] int8x16_t test_vshlq_n_s8(int8x16_t a) { return vshlq_n_s8(a, 3); @@ -4634,7 +4634,7 @@ int8x16_t test_vshlq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vshlq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHL_N]] int16x8_t test_vshlq_n_s16(int16x8_t a) { return vshlq_n_s16(a, 3); @@ -4643,7 +4643,7 @@ int16x8_t test_vshlq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vshlq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <4 x i32> [[VSHL_N]] int32x4_t test_vshlq_n_s32(int32x4_t a) { return vshlq_n_s32(a, 3); @@ -4652,14 +4652,14 @@ int32x4_t test_vshlq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshlq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3) // CHECK: ret <2 x i64> [[VSHL_N]] int64x2_t test_vshlq_n_s64(int64x2_t a) { return vshlq_n_s64(a, 3); } // CHECK-LABEL: @test_vshl_n_u8( -// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 3) // CHECK: ret <8 x i8> [[VSHL_N]] uint8x8_t test_vshl_n_u8(uint8x8_t a) { return vshl_n_u8(a, 3); @@ -4668,7 +4668,7 @@ uint8x8_t test_vshl_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vshl_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <4 x i16> [[VSHL_N]] uint16x4_t test_vshl_n_u16(uint16x4_t a) { return vshl_n_u16(a, 3); @@ -4677,14 +4677,14 @@ uint16x4_t test_vshl_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vshl_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <2 x i32> [[VSHL_N]] uint32x2_t test_vshl_n_u32(uint32x2_t a) { return vshl_n_u32(a, 3); } // CHECK-LABEL: @test_vshlq_n_u8( -// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 3) // CHECK: ret <16 x i8> [[VSHL_N]] uint8x16_t test_vshlq_n_u8(uint8x16_t a) { return vshlq_n_u8(a, 3); @@ -4693,7 +4693,7 @@ uint8x16_t test_vshlq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vshlq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHL_N]] uint16x8_t test_vshlq_n_u16(uint16x8_t a) { return vshlq_n_u16(a, 3); @@ -4702,7 +4702,7 @@ uint16x8_t test_vshlq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vshlq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <4 x i32> [[VSHL_N]] uint32x4_t test_vshlq_n_u32(uint32x4_t a) { return vshlq_n_u32(a, 3); @@ -4711,14 +4711,14 @@ uint32x4_t test_vshlq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vshlq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3) // CHECK: ret <2 x i64> [[VSHL_N]] uint64x2_t test_vshlq_n_u64(uint64x2_t a) { return vshlq_n_u64(a, 3); } // CHECK-LABEL: @test_vshr_n_s8( -// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, splat (i8 3) // CHECK: ret <8 x i8> [[VSHR_N]] int8x8_t test_vshr_n_s8(int8x8_t a) { return vshr_n_s8(a, 3); @@ -4727,7 +4727,7 @@ int8x8_t test_vshr_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vshr_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <4 x i16> [[VSHR_N]] int16x4_t test_vshr_n_s16(int16x4_t a) { return vshr_n_s16(a, 3); @@ -4736,14 +4736,14 @@ int16x4_t test_vshr_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vshr_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <2 x i32> [[VSHR_N]] int32x2_t test_vshr_n_s32(int32x2_t a) { return vshr_n_s32(a, 3); } // CHECK-LABEL: @test_vshrq_n_s8( -// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, splat (i8 3) // CHECK: ret <16 x i8> [[VSHR_N]] int8x16_t test_vshrq_n_s8(int8x16_t a) { return vshrq_n_s8(a, 3); @@ -4752,7 +4752,7 @@ int8x16_t test_vshrq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vshrq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHR_N]] int16x8_t test_vshrq_n_s16(int16x8_t a) { return vshrq_n_s16(a, 3); @@ -4761,7 +4761,7 @@ int16x8_t test_vshrq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vshrq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <4 x i32> [[VSHR_N]] int32x4_t test_vshrq_n_s32(int32x4_t a) { return vshrq_n_s32(a, 3); @@ -4770,14 +4770,14 @@ int32x4_t test_vshrq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshrq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 3) // CHECK: ret <2 x i64> [[VSHR_N]] int64x2_t test_vshrq_n_s64(int64x2_t a) { return vshrq_n_s64(a, 3); } // CHECK-LABEL: @test_vshr_n_u8( -// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, splat (i8 3) // CHECK: ret <8 x i8> [[VSHR_N]] uint8x8_t test_vshr_n_u8(uint8x8_t a) { return vshr_n_u8(a, 3); @@ -4786,7 +4786,7 @@ uint8x8_t test_vshr_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vshr_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <4 x i16> [[VSHR_N]] uint16x4_t test_vshr_n_u16(uint16x4_t a) { return vshr_n_u16(a, 3); @@ -4795,14 +4795,14 @@ uint16x4_t test_vshr_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vshr_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <2 x i32> [[VSHR_N]] uint32x2_t test_vshr_n_u32(uint32x2_t a) { return vshr_n_u32(a, 3); } // CHECK-LABEL: @test_vshrq_n_u8( -// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, splat (i8 3) // CHECK: ret <16 x i8> [[VSHR_N]] uint8x16_t test_vshrq_n_u8(uint8x16_t a) { return vshrq_n_u8(a, 3); @@ -4811,7 +4811,7 @@ uint8x16_t test_vshrq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vshrq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHR_N]] uint16x8_t test_vshrq_n_u16(uint16x8_t a) { return vshrq_n_u16(a, 3); @@ -4820,7 +4820,7 @@ uint16x8_t test_vshrq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vshrq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 3) // CHECK: ret <4 x i32> [[VSHR_N]] uint32x4_t test_vshrq_n_u32(uint32x4_t a) { return vshrq_n_u32(a, 3); @@ -4829,14 +4829,14 @@ uint32x4_t test_vshrq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vshrq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 3) // CHECK: ret <2 x i64> [[VSHR_N]] uint64x2_t test_vshrq_n_u64(uint64x2_t a) { return vshrq_n_u64(a, 3); } // CHECK-LABEL: @test_vsra_n_s8( -// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, splat (i8 3) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { @@ -4848,7 +4848,7 @@ int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], splat (i16 3) // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i16> [[TMP4]] int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { @@ -4860,7 +4860,7 @@ int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], splat (i32 3) // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i32> [[TMP4]] int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { @@ -4868,7 +4868,7 @@ int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vsraq_n_s8( -// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, splat (i8 3) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { @@ -4880,7 +4880,7 @@ int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], splat (i16 3) // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <8 x i16> [[TMP4]] int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { @@ -4892,7 +4892,7 @@ int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], splat (i32 3) // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i32> [[TMP4]] int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { @@ -4904,7 +4904,7 @@ int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], splat (i64 3) // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i64> [[TMP4]] int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { @@ -4912,7 +4912,7 @@ int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vsra_n_u8( -// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, splat (i8 3) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { @@ -4924,7 +4924,7 @@ uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], splat (i16 3) // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i16> [[TMP4]] uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { @@ -4936,7 +4936,7 @@ uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 3) // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i32> [[TMP4]] uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { @@ -4944,7 +4944,7 @@ uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vsraq_n_u8( -// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, splat (i8 3) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { @@ -4956,7 +4956,7 @@ uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], splat (i16 3) // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <8 x i16> [[TMP4]] uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { @@ -4968,7 +4968,7 @@ uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], splat (i32 3) // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i32> [[TMP4]] uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { @@ -4980,7 +4980,7 @@ uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], splat (i64 3) // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i64> [[TMP4]] uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { @@ -4988,7 +4988,7 @@ uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: @test_vrshr_n_s8( -// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -3)) // CHECK: ret <8 x i8> [[VRSHR_N]] int8x8_t test_vrshr_n_s8(int8x8_t a) { return vrshr_n_s8(a, 3); @@ -4997,7 +4997,7 @@ int8x8_t test_vrshr_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vrshr_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) // CHECK: ret <4 x i16> [[VRSHR_N1]] int16x4_t test_vrshr_n_s16(int16x4_t a) { return vrshr_n_s16(a, 3); @@ -5006,14 +5006,14 @@ int16x4_t test_vrshr_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vrshr_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) // CHECK: ret <2 x i32> [[VRSHR_N1]] int32x2_t test_vrshr_n_s32(int32x2_t a) { return vrshr_n_s32(a, 3); } // CHECK-LABEL: @test_vrshrq_n_s8( -// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -3)) // CHECK: ret <16 x i8> [[VRSHR_N]] int8x16_t test_vrshrq_n_s8(int8x16_t a) { return vrshrq_n_s8(a, 3); @@ -5022,7 +5022,7 @@ int8x16_t test_vrshrq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vrshrq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) // CHECK: ret <8 x i16> [[VRSHR_N1]] int16x8_t test_vrshrq_n_s16(int16x8_t a) { return vrshrq_n_s16(a, 3); @@ -5031,7 +5031,7 @@ int16x8_t test_vrshrq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vrshrq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) // CHECK: ret <4 x i32> [[VRSHR_N1]] int32x4_t test_vrshrq_n_s32(int32x4_t a) { return vrshrq_n_s32(a, 3); @@ -5040,14 +5040,14 @@ int32x4_t test_vrshrq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vrshrq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) // CHECK: ret <2 x i64> [[VRSHR_N1]] int64x2_t test_vrshrq_n_s64(int64x2_t a) { return vrshrq_n_s64(a, 3); } // CHECK-LABEL: @test_vrshr_n_u8( -// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -3)) // CHECK: ret <8 x i8> [[VRSHR_N]] uint8x8_t test_vrshr_n_u8(uint8x8_t a) { return vrshr_n_u8(a, 3); @@ -5056,7 +5056,7 @@ uint8x8_t test_vrshr_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vrshr_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) // CHECK: ret <4 x i16> [[VRSHR_N1]] uint16x4_t test_vrshr_n_u16(uint16x4_t a) { return vrshr_n_u16(a, 3); @@ -5065,14 +5065,14 @@ uint16x4_t test_vrshr_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vrshr_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) // CHECK: ret <2 x i32> [[VRSHR_N1]] uint32x2_t test_vrshr_n_u32(uint32x2_t a) { return vrshr_n_u32(a, 3); } // CHECK-LABEL: @test_vrshrq_n_u8( -// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -3)) // CHECK: ret <16 x i8> [[VRSHR_N]] uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { return vrshrq_n_u8(a, 3); @@ -5081,7 +5081,7 @@ uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vrshrq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) // CHECK: ret <8 x i16> [[VRSHR_N1]] uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { return vrshrq_n_u16(a, 3); @@ -5090,7 +5090,7 @@ uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vrshrq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) // CHECK: ret <4 x i32> [[VRSHR_N1]] uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { return vrshrq_n_u32(a, 3); @@ -5099,14 +5099,14 @@ uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vrshrq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) // CHECK: ret <2 x i64> [[VRSHR_N1]] uint64x2_t test_vrshrq_n_u64(uint64x2_t a) { return vrshrq_n_u64(a, 3); } // CHECK-LABEL: @test_vrsra_n_s8( -// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -3)) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { @@ -5117,7 +5117,7 @@ int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i16> [[TMP3]] @@ -5129,7 +5129,7 @@ int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i32> [[TMP3]] @@ -5138,7 +5138,7 @@ int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vrsraq_n_s8( -// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -3)) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { @@ -5149,7 +5149,7 @@ int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <8 x i16> [[TMP3]] @@ -5161,7 +5161,7 @@ int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i32> [[TMP3]] @@ -5173,7 +5173,7 @@ int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i64> [[TMP3]] @@ -5182,7 +5182,7 @@ int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vrsra_n_u8( -// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -3)) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { @@ -5193,7 +5193,7 @@ uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i16> [[TMP3]] @@ -5205,7 +5205,7 @@ uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i32> [[TMP3]] @@ -5214,7 +5214,7 @@ uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vrsraq_n_u8( -// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -3)) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { @@ -5225,7 +5225,7 @@ uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <8 x i16> [[TMP3]] @@ -5237,7 +5237,7 @@ uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i32> [[TMP3]] @@ -5249,7 +5249,7 @@ uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i64> [[TMP3]] @@ -5606,7 +5606,7 @@ poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { } // CHECK-LABEL: @test_vqshlu_n_s8( -// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 3)) // CHECK: ret <8 x i8> [[VQSHLU_N]] uint8x8_t test_vqshlu_n_s8(int8x8_t a) { return vqshlu_n_s8(a, 3); @@ -5615,7 +5615,7 @@ uint8x8_t test_vqshlu_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vqshlu_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> splat (i16 3)) // CHECK: ret <4 x i16> [[VQSHLU_N1]] uint16x4_t test_vqshlu_n_s16(int16x4_t a) { return vqshlu_n_s16(a, 3); @@ -5624,14 +5624,14 @@ uint16x4_t test_vqshlu_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vqshlu_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> splat (i32 3)) // CHECK: ret <2 x i32> [[VQSHLU_N1]] uint32x2_t test_vqshlu_n_s32(int32x2_t a) { return vqshlu_n_s32(a, 3); } // CHECK-LABEL: @test_vqshluq_n_s8( -// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 3)) // CHECK: ret <16 x i8> [[VQSHLU_N]] uint8x16_t test_vqshluq_n_s8(int8x16_t a) { return vqshluq_n_s8(a, 3); @@ -5640,7 +5640,7 @@ uint8x16_t test_vqshluq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vqshluq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> splat (i16 3)) // CHECK: ret <8 x i16> [[VQSHLU_N1]] uint16x8_t test_vqshluq_n_s16(int16x8_t a) { return vqshluq_n_s16(a, 3); @@ -5649,7 +5649,7 @@ uint16x8_t test_vqshluq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqshluq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> splat (i32 3)) // CHECK: ret <4 x i32> [[VQSHLU_N1]] uint32x4_t test_vqshluq_n_s32(int32x4_t a) { return vqshluq_n_s32(a, 3); @@ -5658,7 +5658,7 @@ uint32x4_t test_vqshluq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqshluq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> splat (i64 3)) // CHECK: ret <2 x i64> [[VQSHLU_N1]] uint64x2_t test_vqshluq_n_s64(int64x2_t a) { return vqshluq_n_s64(a, 3); @@ -5667,7 +5667,7 @@ uint64x2_t test_vqshluq_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3) // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> // CHECK: ret <8 x i8> [[VSHRN_N]] int8x8_t test_vshrn_n_s16(int16x8_t a) { @@ -5677,7 +5677,7 @@ int8x8_t test_vshrn_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 9) // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> // CHECK: ret <4 x i16> [[VSHRN_N]] int16x4_t test_vshrn_n_s32(int32x4_t a) { @@ -5687,7 +5687,7 @@ int16x4_t test_vshrn_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 19) // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> // CHECK: ret <2 x i32> [[VSHRN_N]] int32x2_t test_vshrn_n_s64(int64x2_t a) { @@ -5697,7 +5697,7 @@ int32x2_t test_vshrn_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3) // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> // CHECK: ret <8 x i8> [[VSHRN_N]] uint8x8_t test_vshrn_n_u16(uint16x8_t a) { @@ -5707,7 +5707,7 @@ uint8x8_t test_vshrn_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 9) // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> // CHECK: ret <4 x i16> [[VSHRN_N]] uint16x4_t test_vshrn_n_u32(uint32x4_t a) { @@ -5717,7 +5717,7 @@ uint16x4_t test_vshrn_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 19) // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> // CHECK: ret <2 x i32> [[VSHRN_N]] uint32x2_t test_vshrn_n_u64(uint64x2_t a) { @@ -5727,7 +5727,7 @@ uint32x2_t test_vshrn_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vshrn_high_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3) // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] @@ -5738,7 +5738,7 @@ int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vshrn_high_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 9) // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] @@ -5749,7 +5749,7 @@ int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vshrn_high_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 19) // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] @@ -5760,7 +5760,7 @@ int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vshrn_high_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3) // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] @@ -5771,7 +5771,7 @@ uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vshrn_high_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 9) // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] @@ -5782,7 +5782,7 @@ uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vshrn_high_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 19) // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] @@ -6248,7 +6248,7 @@ uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vshll_n_s8( // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHLL_N]] int16x8_t test_vshll_n_s8(int8x8_t a) { return vshll_n_s8(a, 3); @@ -6258,7 +6258,7 @@ int16x8_t test_vshll_n_s8(int8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) // CHECK: ret <4 x i32> [[VSHLL_N]] int32x4_t test_vshll_n_s16(int16x4_t a) { return vshll_n_s16(a, 9); @@ -6268,7 +6268,7 @@ int32x4_t test_vshll_n_s16(int16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) // CHECK: ret <2 x i64> [[VSHLL_N]] int64x2_t test_vshll_n_s32(int32x2_t a) { return vshll_n_s32(a, 19); @@ -6276,7 +6276,7 @@ int64x2_t test_vshll_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vshll_n_u8( // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHLL_N]] uint16x8_t test_vshll_n_u8(uint8x8_t a) { return vshll_n_u8(a, 3); @@ -6286,7 +6286,7 @@ uint16x8_t test_vshll_n_u8(uint8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) // CHECK: ret <4 x i32> [[VSHLL_N]] uint32x4_t test_vshll_n_u16(uint16x4_t a) { return vshll_n_u16(a, 9); @@ -6296,7 +6296,7 @@ uint32x4_t test_vshll_n_u16(uint16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) // CHECK: ret <2 x i64> [[VSHLL_N]] uint64x2_t test_vshll_n_u32(uint32x2_t a) { return vshll_n_u32(a, 19); @@ -6305,7 +6305,7 @@ uint64x2_t test_vshll_n_u32(uint32x2_t a) { // CHECK-LABEL: @test_vshll_high_n_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHLL_N]] int16x8_t test_vshll_high_n_s8(int8x16_t a) { return vshll_high_n_s8(a, 3); @@ -6316,7 +6316,7 @@ int16x8_t test_vshll_high_n_s8(int8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) // CHECK: ret <4 x i32> [[VSHLL_N]] int32x4_t test_vshll_high_n_s16(int16x8_t a) { return vshll_high_n_s16(a, 9); @@ -6327,7 +6327,7 @@ int32x4_t test_vshll_high_n_s16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) // CHECK: ret <2 x i64> [[VSHLL_N]] int64x2_t test_vshll_high_n_s32(int32x4_t a) { return vshll_high_n_s32(a, 19); @@ -6336,7 +6336,7 @@ int64x2_t test_vshll_high_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshll_high_n_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) // CHECK: ret <8 x i16> [[VSHLL_N]] uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { return vshll_high_n_u8(a, 3); @@ -6347,7 +6347,7 @@ uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) // CHECK: ret <4 x i32> [[VSHLL_N]] uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { return vshll_high_n_u16(a, 9); @@ -6358,7 +6358,7 @@ uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) // CHECK: ret <2 x i64> [[VSHLL_N]] uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { return vshll_high_n_u32(a, 19); @@ -7066,7 +7066,7 @@ uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8) // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VADDHN2_I]] int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { @@ -7077,7 +7077,7 @@ int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16) // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VADDHN2_I]] int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { @@ -7088,7 +7088,7 @@ int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32) // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VADDHN2_I]] int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { @@ -7099,7 +7099,7 @@ int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8) // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VADDHN2_I]] uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { @@ -7110,7 +7110,7 @@ uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16) // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VADDHN2_I]] uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { @@ -7121,7 +7121,7 @@ uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32) // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VADDHN2_I]] uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { @@ -7132,7 +7132,7 @@ uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b -// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], +// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], splat (i16 8) // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] @@ -7144,7 +7144,7 @@ int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b -// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], +// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], splat (i32 16) // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -7156,7 +7156,7 @@ int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b -// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], +// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], splat (i64 32) // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -7168,7 +7168,7 @@ int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b -// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], +// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], splat (i16 8) // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] @@ -7180,7 +7180,7 @@ uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b -// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], +// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], splat (i32 16) // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -7192,7 +7192,7 @@ uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b -// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], +// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], splat (i64 32) // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -7326,7 +7326,7 @@ uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8) // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VSUBHN2_I]] int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { @@ -7337,7 +7337,7 @@ int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16) // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VSUBHN2_I]] int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { @@ -7348,7 +7348,7 @@ int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32) // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VSUBHN2_I]] int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { @@ -7359,7 +7359,7 @@ int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8) // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VSUBHN2_I]] uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { @@ -7370,7 +7370,7 @@ uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16) // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VSUBHN2_I]] uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { @@ -7381,7 +7381,7 @@ uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32) // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VSUBHN2_I]] uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { @@ -7392,7 +7392,7 @@ uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b -// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], +// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], splat (i16 8) // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] @@ -7404,7 +7404,7 @@ int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b -// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], +// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], splat (i32 16) // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -7416,7 +7416,7 @@ int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b -// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], +// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], splat (i64 32) // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -7428,7 +7428,7 @@ int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b -// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], +// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], splat (i16 8) // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] @@ -7440,7 +7440,7 @@ uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b -// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], +// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], splat (i32 16) // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] @@ -7452,7 +7452,7 @@ uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b -// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], +// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], splat (i64 32) // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32> // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] @@ -13499,7 +13499,7 @@ int64_t test_vshrd_n_s64(int64_t a) { // CHECK-LABEL: @test_vshr_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHR_N]] int64x1_t test_vshr_n_s64(int64x1_t a) { return vshr_n_s64(a, 1); @@ -13521,7 +13521,7 @@ uint64_t test_vshrd_n_u64_2() { // CHECK-LABEL: @test_vshr_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHR_N]] uint64x1_t test_vshr_n_u64(uint64x1_t a) { return vshr_n_u64(a, 1); @@ -13537,7 +13537,7 @@ int64_t test_vrshrd_n_s64(int64_t a) { // CHECK-LABEL: @test_vrshr_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) // CHECK: ret <1 x i64> [[VRSHR_N1]] int64x1_t test_vrshr_n_s64(int64x1_t a) { return vrshr_n_s64(a, 1); @@ -13553,7 +13553,7 @@ uint64_t test_vrshrd_n_u64(uint64_t a) { // CHECK-LABEL: @test_vrshr_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) // CHECK: ret <1 x i64> [[VRSHR_N1]] uint64x1_t test_vrshr_n_u64(uint64x1_t a) { return vrshr_n_u64(a, 1); @@ -13572,7 +13572,7 @@ int64_t test_vsrad_n_s64(int64_t a, int64_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], splat (i64 1) // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <1 x i64> [[TMP4]] int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { @@ -13598,7 +13598,7 @@ uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], splat (i64 1) // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <1 x i64> [[TMP4]] uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { @@ -13617,7 +13617,7 @@ int64_t test_vrsrad_n_s64(int64_t a, int64_t b) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <1 x i64> [[TMP3]] @@ -13637,7 +13637,7 @@ uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <1 x i64> [[TMP3]] @@ -13655,7 +13655,7 @@ int64_t test_vshld_n_s64(int64_t a) { // CHECK-LABEL: @test_vshl_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHL_N]] int64x1_t test_vshl_n_s64(int64x1_t a) { return vshl_n_s64(a, 1); @@ -13671,7 +13671,7 @@ uint64_t test_vshld_n_u64(uint64_t a) { // CHECK-LABEL: @test_vshl_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHL_N]] uint64x1_t test_vshl_n_u64(uint64x1_t a) { return vshl_n_u64(a, 1); @@ -13830,7 +13830,7 @@ uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vqshl_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) +// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VQSHL_N1]] int64x1_t test_vqshl_n_s64(int64x1_t a) { return vqshl_n_s64(a, 1); @@ -13871,7 +13871,7 @@ uint64_t test_vqshld_n_u64(uint64_t a) { // CHECK-LABEL: @test_vqshl_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) +// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VQSHL_N1]] uint64x1_t test_vqshl_n_u64(uint64x1_t a) { return vqshl_n_u64(a, 1); @@ -13912,7 +13912,7 @@ int64_t test_vqshlud_n_s64(int64_t a) { // CHECK-LABEL: @test_vqshlu_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VQSHLU_N1]] uint64x1_t test_vqshlu_n_s64(int64x1_t a) { return vqshlu_n_s64(a, 1); diff --git a/clang/test/CodeGen/aarch64-neon-misc.c b/clang/test/CodeGen/aarch64-neon-misc.c index 759a8d5cb2d8d4..165f33a9f399fe 100644 --- a/clang/test/CodeGen/aarch64-neon-misc.c +++ b/clang/test/CodeGen/aarch64-neon-misc.c @@ -1668,98 +1668,98 @@ poly8x16_t test_vcntq_p8(poly8x16_t a) { } // CHECK-LABEL: @test_vmvn_s8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1) // CHECK: ret <8 x i8> [[NEG_I]] int8x8_t test_vmvn_s8(int8x8_t a) { return vmvn_s8(a); } // CHECK-LABEL: @test_vmvnq_s8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1) // CHECK: ret <16 x i8> [[NEG_I]] int8x16_t test_vmvnq_s8(int8x16_t a) { return vmvnq_s8(a); } // CHECK-LABEL: @test_vmvn_s16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1) // CHECK: ret <4 x i16> [[NEG_I]] int16x4_t test_vmvn_s16(int16x4_t a) { return vmvn_s16(a); } // CHECK-LABEL: @test_vmvnq_s16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1) // CHECK: ret <8 x i16> [[NEG_I]] int16x8_t test_vmvnq_s16(int16x8_t a) { return vmvnq_s16(a); } // CHECK-LABEL: @test_vmvn_s32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1) // CHECK: ret <2 x i32> [[NEG_I]] int32x2_t test_vmvn_s32(int32x2_t a) { return vmvn_s32(a); } // CHECK-LABEL: @test_vmvnq_s32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1) // CHECK: ret <4 x i32> [[NEG_I]] int32x4_t test_vmvnq_s32(int32x4_t a) { return vmvnq_s32(a); } // CHECK-LABEL: @test_vmvn_u8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1) // CHECK: ret <8 x i8> [[NEG_I]] uint8x8_t test_vmvn_u8(uint8x8_t a) { return vmvn_u8(a); } // CHECK-LABEL: @test_vmvnq_u8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1) // CHECK: ret <16 x i8> [[NEG_I]] uint8x16_t test_vmvnq_u8(uint8x16_t a) { return vmvnq_u8(a); } // CHECK-LABEL: @test_vmvn_u16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1) // CHECK: ret <4 x i16> [[NEG_I]] uint16x4_t test_vmvn_u16(uint16x4_t a) { return vmvn_u16(a); } // CHECK-LABEL: @test_vmvnq_u16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1) // CHECK: ret <8 x i16> [[NEG_I]] uint16x8_t test_vmvnq_u16(uint16x8_t a) { return vmvnq_u16(a); } // CHECK-LABEL: @test_vmvn_u32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1) // CHECK: ret <2 x i32> [[NEG_I]] uint32x2_t test_vmvn_u32(uint32x2_t a) { return vmvn_u32(a); } // CHECK-LABEL: @test_vmvnq_u32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1) // CHECK: ret <4 x i32> [[NEG_I]] uint32x4_t test_vmvnq_u32(uint32x4_t a) { return vmvnq_u32(a); } // CHECK-LABEL: @test_vmvn_p8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1) // CHECK: ret <8 x i8> [[NEG_I]] poly8x8_t test_vmvn_p8(poly8x8_t a) { return vmvn_p8(a); } // CHECK-LABEL: @test_vmvnq_p8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1) // CHECK: ret <16 x i8> [[NEG_I]] poly8x16_t test_vmvnq_p8(poly8x16_t a) { return vmvnq_p8(a); @@ -2076,7 +2076,7 @@ uint32x4_t test_vqmovn_high_u64(uint32x2_t a, uint64x2_t b) { // CHECK-LABEL: @test_vshll_n_s8( // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8) // CHECK: ret <8 x i16> [[VSHLL_N]] int16x8_t test_vshll_n_s8(int8x8_t a) { return vshll_n_s8(a, 8); @@ -2086,7 +2086,7 @@ int16x8_t test_vshll_n_s8(int8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK: ret <4 x i32> [[VSHLL_N]] int32x4_t test_vshll_n_s16(int16x4_t a) { return vshll_n_s16(a, 16); @@ -2096,7 +2096,7 @@ int32x4_t test_vshll_n_s16(int16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32) // CHECK: ret <2 x i64> [[VSHLL_N]] int64x2_t test_vshll_n_s32(int32x2_t a) { return vshll_n_s32(a, 32); @@ -2104,7 +2104,7 @@ int64x2_t test_vshll_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vshll_n_u8( // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8) // CHECK: ret <8 x i16> [[VSHLL_N]] uint16x8_t test_vshll_n_u8(uint8x8_t a) { return vshll_n_u8(a, 8); @@ -2114,7 +2114,7 @@ uint16x8_t test_vshll_n_u8(uint8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK: ret <4 x i32> [[VSHLL_N]] uint32x4_t test_vshll_n_u16(uint16x4_t a) { return vshll_n_u16(a, 16); @@ -2124,7 +2124,7 @@ uint32x4_t test_vshll_n_u16(uint16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32) // CHECK: ret <2 x i64> [[VSHLL_N]] uint64x2_t test_vshll_n_u32(uint32x2_t a) { return vshll_n_u32(a, 32); @@ -2133,7 +2133,7 @@ uint64x2_t test_vshll_n_u32(uint32x2_t a) { // CHECK-LABEL: @test_vshll_high_n_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8) // CHECK: ret <8 x i16> [[VSHLL_N]] int16x8_t test_vshll_high_n_s8(int8x16_t a) { return vshll_high_n_s8(a, 8); @@ -2144,7 +2144,7 @@ int16x8_t test_vshll_high_n_s8(int8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK: ret <4 x i32> [[VSHLL_N]] int32x4_t test_vshll_high_n_s16(int16x8_t a) { return vshll_high_n_s16(a, 16); @@ -2155,7 +2155,7 @@ int32x4_t test_vshll_high_n_s16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32) // CHECK: ret <2 x i64> [[VSHLL_N]] int64x2_t test_vshll_high_n_s32(int32x4_t a) { return vshll_high_n_s32(a, 32); @@ -2164,7 +2164,7 @@ int64x2_t test_vshll_high_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshll_high_n_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8) // CHECK: ret <8 x i16> [[VSHLL_N]] uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { return vshll_high_n_u8(a, 8); @@ -2175,7 +2175,7 @@ uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK: ret <4 x i32> [[VSHLL_N]] uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { return vshll_high_n_u16(a, 16); @@ -2186,7 +2186,7 @@ uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32) // CHECK: ret <2 x i64> [[VSHLL_N]] uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { return vshll_high_n_u32(a, 32); diff --git a/clang/test/CodeGen/aarch64-neon-shifts.c b/clang/test/CodeGen/aarch64-neon-shifts.c index cf1bbef7f8ad7d..eb169f5290d262 100644 --- a/clang/test/CodeGen/aarch64-neon-shifts.c +++ b/clang/test/CodeGen/aarch64-neon-shifts.c @@ -7,13 +7,13 @@ uint8x8_t test_shift_vshr(uint8x8_t a) { // CHECK-LABEL: test_shift_vshr - // CHECK: %{{.*}} = lshr <8 x i8> %a, + // CHECK: %{{.*}} = lshr <8 x i8> %a, splat (i8 5) return vshr_n_u8(a, 5); } int8x8_t test_shift_vshr_smax(int8x8_t a) { // CHECK-LABEL: test_shift_vshr_smax - // CHECK: %{{.*}} = ashr <8 x i8> %a, + // CHECK: %{{.*}} = ashr <8 x i8> %a, splat (i8 7) return vshr_n_s8(a, 8); } @@ -25,14 +25,14 @@ uint8x8_t test_shift_vshr_umax(uint8x8_t a) { uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: test_shift_vsra - // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, + // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, splat (i8 5) // CHECK: %{{.*}} = add <8 x i8> %a, %[[SHR]] return vsra_n_u8(a, b, 5); } int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) { // CHECK-LABEL: test_shift_vsra_smax - // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, + // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, splat (i8 7) // CHECK: %{{.*}} = add <8 x i8> %a, %[[SHR]] return vsra_n_s8(a, b, 8); } diff --git a/clang/test/CodeGen/aarch64-neon-tbl.c b/clang/test/CodeGen/aarch64-neon-tbl.c index 20f3cd8d88919b..d51f0ae2e11c5f 100644 --- a/clang/test/CodeGen/aarch64-neon-tbl.c +++ b/clang/test/CodeGen/aarch64-neon-tbl.c @@ -245,10 +245,10 @@ int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], +// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8) // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], +// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1) // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] @@ -296,10 +296,10 @@ int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) { // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], +// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24) // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8> // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]] -// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], +// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], splat (i8 -1) // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] @@ -725,10 +725,10 @@ uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], +// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8) // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], +// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1) // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] @@ -776,10 +776,10 @@ uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) { // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], +// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24) // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8> // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]] -// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], +// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], splat (i8 -1) // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] @@ -1205,10 +1205,10 @@ poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) { // CHECK-NEXT: entry: // CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], +// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8) // CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> // CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], +// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1) // CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] @@ -1256,10 +1256,10 @@ poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) { // CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], +// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24) // CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8> // CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]] -// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], +// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], splat (i8 -1) // CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]] // CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]] // CHECK-NEXT: ret <8 x i8> [[VTBX_I]] diff --git a/clang/test/CodeGen/aarch64-poly64.c b/clang/test/CodeGen/aarch64-poly64.c index 6e8f85557913e0..f3c057ecf48c17 100644 --- a/clang/test/CodeGen/aarch64-poly64.c +++ b/clang/test/CodeGen/aarch64-poly64.c @@ -42,7 +42,7 @@ uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbsl_p64(<1 x i64> noundef %a, <1 x i64> noundef %b, <1 x i64> noundef %c) #0 { // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %a, %b -// CHECK: [[TMP3:%.*]] = xor <1 x i64> %a, +// CHECK: [[TMP3:%.*]] = xor <1 x i64> %a, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %c // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <1 x i64> [[VBSL5_I]] @@ -52,7 +52,7 @@ poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) { // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbslq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b, <2 x i64> noundef %c) #0 { // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %a, %b -// CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, +// CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, splat (i64 -1) // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <2 x i64> [[VBSL5_I]] diff --git a/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c b/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c index 74b543b67bfba4..017c71aa41143b 100644 --- a/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c @@ -370,7 +370,7 @@ fixed_uint64_t xor_u64(fixed_uint64_t a, fixed_uint64_t b) { // CHECK-NEXT: entry: // CHECK-NEXT: [[A_COERCE:%.*]] = bitcast [[TMP0:%.*]] to // CHECK-NEXT: [[A:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8( [[A_COERCE]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i8> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i8> [[A]], splat (i8 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv2i8.v8i8( undef, <8 x i8> [[NOT]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to // CHECK-NEXT: ret [[TMP1]] @@ -382,7 +382,7 @@ fixed_bool_t neg_bool(fixed_bool_t a) { // CHECK-LABEL: @neg_i8( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <64 x i8> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <64 x i8> [[A]], splat (i8 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv16i8.v64i8( undef, <64 x i8> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -393,7 +393,7 @@ fixed_int8_t neg_i8(fixed_int8_t a) { // CHECK-LABEL: @neg_i16( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i16> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i16> [[A]], splat (i16 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv8i16.v32i16( undef, <32 x i16> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -404,7 +404,7 @@ fixed_int16_t neg_i16(fixed_int16_t a) { // CHECK-LABEL: @neg_i32( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i32> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i32> [[A]], splat (i32 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -415,7 +415,7 @@ fixed_int32_t neg_i32(fixed_int32_t a) { // CHECK-LABEL: @neg_i64( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i64> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i64> [[A]], splat (i64 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv2i64.v8i64( undef, <8 x i64> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -426,7 +426,7 @@ fixed_int64_t neg_i64(fixed_int64_t a) { // CHECK-LABEL: @neg_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <64 x i8> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <64 x i8> [[A]], splat (i8 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv16i8.v64i8( undef, <64 x i8> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -437,7 +437,7 @@ fixed_uint8_t neg_u8(fixed_uint8_t a) { // CHECK-LABEL: @neg_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i16> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i16> [[A]], splat (i16 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv8i16.v32i16( undef, <32 x i16> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -448,7 +448,7 @@ fixed_uint16_t neg_u16(fixed_uint16_t a) { // CHECK-LABEL: @neg_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i32> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i32> [[A]], splat (i32 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv4i32.v16i32( undef, <16 x i32> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -459,7 +459,7 @@ fixed_uint32_t neg_u32(fixed_uint32_t a) { // CHECK-LABEL: @neg_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64( [[A_COERCE:%.*]], i64 0) -// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i64> [[A]], +// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i64> [[A]], splat (i64 -1) // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv2i64.v8i64( undef, <8 x i64> [[NOT]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c index 75fa4a2d8033a1..4d2ef318005bd3 100644 --- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c @@ -21,7 +21,7 @@ // CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> // CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> // CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[A]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[A]], +// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[A]], splat (i16 -1) // CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] // CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half> @@ -40,7 +40,7 @@ float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) { // CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> // CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> // CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[A]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[A]], +// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[A]], splat (i16 -1) // CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] // CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half> diff --git a/clang/test/CodeGen/arm-bf16-convert-intrinsics.c b/clang/test/CodeGen/arm-bf16-convert-intrinsics.c index 9477ebdb8285af..e2be98c086853e 100644 --- a/clang/test/CodeGen/arm-bf16-convert-intrinsics.c +++ b/clang/test/CodeGen/arm-bf16-convert-intrinsics.c @@ -30,7 +30,7 @@ // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I]], align 8 // CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> -// CHECK-A64-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK-A64-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I]], align 16 // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I]], align 16 // CHECK-A64-NEXT: ret <4 x float> [[TMP3]] @@ -43,7 +43,7 @@ // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I]], align 8 // CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> -// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I]], align 8 // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I]], align 8 // CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]] @@ -65,7 +65,7 @@ // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[__REINT_836_I]], align 8 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP3]], +// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 16) // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I]], align 8 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[__REINT1_836_I]], align 8 // CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP4]] @@ -83,7 +83,7 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) { // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8 // CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> -// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 16 // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 16 // CHECK-A64-NEXT: ret <4 x float> [[TMP3]] @@ -97,7 +97,7 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) { // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8 // CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> -// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]] @@ -138,7 +138,7 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) { // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32> -// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], +// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16) // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP8]] @@ -156,7 +156,7 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) { // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8 // CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> -// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 16 // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 16 // CHECK-A64-NEXT: ret <4 x float> [[TMP3]] @@ -170,7 +170,7 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) { // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8 // CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> -// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]] @@ -211,7 +211,7 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) { // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32> -// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], +// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16) // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 8 // CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP8]] diff --git a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c index bf56c0685bb6e4..8bb97f57e540d0 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c @@ -84,7 +84,7 @@ int32x4_t test_vabsq_s32(int32x4_t a) // CHECK-LABEL: @test_vmvnq_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], splat (i8 -1) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // int8x16_t test_vmvnq_s8(int8x16_t a) @@ -98,7 +98,7 @@ int8x16_t test_vmvnq_s8(int8x16_t a) // CHECK-LABEL: @test_vmvnq_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], splat (i16 -1) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vmvnq_s16(int16x8_t a) @@ -112,7 +112,7 @@ int16x8_t test_vmvnq_s16(int16x8_t a) // CHECK-LABEL: @test_vmvnq_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vmvnq_s32(int32x4_t a) @@ -126,7 +126,7 @@ int32x4_t test_vmvnq_s32(int32x4_t a) // CHECK-LABEL: @test_vmvnq_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], splat (i8 -1) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // uint8x16_t test_vmvnq_u8(uint8x16_t a) @@ -140,7 +140,7 @@ uint8x16_t test_vmvnq_u8(uint8x16_t a) // CHECK-LABEL: @test_vmvnq_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], splat (i16 -1) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vmvnq_u16(uint16x8_t a) @@ -154,7 +154,7 @@ uint16x8_t test_vmvnq_u16(uint16x8_t a) // CHECK-LABEL: @test_vmvnq_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vmvnq_u32(uint32x4_t a) @@ -431,9 +431,9 @@ int32x4_t test_vnegq_s32(int32x4_t a) // CHECK-LABEL: @test_vqabsq_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <16 x i8> [[A:%.*]], zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <16 x i8> [[A]], +// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <16 x i8> [[A]], splat (i8 -128) // CHECK-NEXT: [[TMP2:%.*]] = sub <16 x i8> zeroinitializer, [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> , <16 x i8> [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> splat (i8 127), <16 x i8> [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[A]], <16 x i8> [[TMP3]] // CHECK-NEXT: ret <16 x i8> [[TMP4]] // @@ -449,9 +449,9 @@ int8x16_t test_vqabsq_s8(int8x16_t a) // CHECK-LABEL: @test_vqabsq_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A]], +// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A]], splat (i16 -32768) // CHECK-NEXT: [[TMP2:%.*]] = sub <8 x i16> zeroinitializer, [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 32767), <8 x i16> [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[A]], <8 x i16> [[TMP3]] // CHECK-NEXT: ret <8 x i16> [[TMP4]] // @@ -467,9 +467,9 @@ int16x8_t test_vqabsq_s16(int16x8_t a) // CHECK-LABEL: @test_vqabsq_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[A]], +// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[A]], splat (i32 -2147483648) // CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> zeroinitializer, [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 2147483647), <4 x i32> [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[TMP3]] // CHECK-NEXT: ret <4 x i32> [[TMP4]] // @@ -484,9 +484,9 @@ int32x4_t test_vqabsq_s32(int32x4_t a) // CHECK-LABEL: @test_vqnegq_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], splat (i8 -128) // CHECK-NEXT: [[TMP1:%.*]] = sub <16 x i8> zeroinitializer, [[A]] -// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> , <16 x i8> [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> splat (i8 127), <16 x i8> [[TMP1]] // CHECK-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vqnegq_s8(int8x16_t a) @@ -500,9 +500,9 @@ int8x16_t test_vqnegq_s8(int8x16_t a) // CHECK-LABEL: @test_vqnegq_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], splat (i16 -32768) // CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i16> zeroinitializer, [[A]] -// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> , <8 x i16> [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> splat (i16 32767), <8 x i16> [[TMP1]] // CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vqnegq_s16(int16x8_t a) @@ -516,9 +516,9 @@ int16x8_t test_vqnegq_s16(int16x8_t a) // CHECK-LABEL: @test_vqnegq_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], splat (i32 -2147483648) // CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[A]] -// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> , <4 x i32> [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> splat (i32 2147483647), <4 x i32> [[TMP1]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vqnegq_s32(int32x4_t a) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c index b038322bae5b23..10fe02bd421d11 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c @@ -8,7 +8,7 @@ // CHECK-LABEL: @test_vbicq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 11007) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vbicq_n_s16(int16x8_t a) @@ -22,7 +22,7 @@ int16x8_t test_vbicq_n_s16(int16x8_t a) // CHECK-LABEL: @test_vbicq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -252) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vbicq_n_s32(int32x4_t a) @@ -36,7 +36,7 @@ int32x4_t test_vbicq_n_s32(int32x4_t a) // CHECK-LABEL: @test_vbicq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 -243) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vbicq_n_u16(uint16x8_t a) @@ -50,7 +50,7 @@ uint16x8_t test_vbicq_n_u16(uint16x8_t a) // CHECK-LABEL: @test_vbicq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -8193) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vbicq_n_u32(uint32x4_t a) @@ -64,7 +64,7 @@ uint32x4_t test_vbicq_n_u32(uint32x4_t a) // CHECK-LABEL: @test_vorrq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 195) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vorrq_n_s16(int16x8_t a) @@ -78,7 +78,7 @@ int16x8_t test_vorrq_n_s16(int16x8_t a) // CHECK-LABEL: @test_vorrq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 65536) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vorrq_n_s32(int32x4_t a) @@ -92,7 +92,7 @@ int32x4_t test_vorrq_n_s32(int32x4_t a) // CHECK-LABEL: @test_vorrq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 -4096) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vorrq_n_u16(uint16x8_t a) @@ -106,7 +106,7 @@ uint16x8_t test_vorrq_n_u16(uint16x8_t a) // CHECK-LABEL: @test_vorrq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 8978432) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vorrq_n_u32(uint32x4_t a) @@ -120,7 +120,7 @@ uint32x4_t test_vorrq_n_u32(uint32x4_t a) // CHECK-LABEL: @test_vmvnq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <8 x i16> +// CHECK-NEXT: ret <8 x i16> splat (i16 27391) // int16x8_t test_vmvnq_n_s16() { @@ -129,7 +129,7 @@ int16x8_t test_vmvnq_n_s16() // CHECK-LABEL: @test_vmvnq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 -5570561) // int32x4_t test_vmvnq_n_s32() { @@ -138,7 +138,7 @@ int32x4_t test_vmvnq_n_s32() // CHECK-LABEL: @test_vmvnq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <8 x i16> +// CHECK-NEXT: ret <8 x i16> splat (i16 -18689) // uint16x8_t test_vmvnq_n_u16() { @@ -147,7 +147,7 @@ uint16x8_t test_vmvnq_n_u16() // CHECK-LABEL: @test_vmvnq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 1023410175) // uint32x4_t test_vmvnq_n_u32() { @@ -158,7 +158,7 @@ uint32x4_t test_vmvnq_n_u32() // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 -11265) // CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] // CHECK-NEXT: ret <8 x i16> [[TMP3]] // @@ -175,7 +175,7 @@ int16x8_t test_vbicq_m_n_s16(int16x8_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -13893633) // CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -192,7 +192,7 @@ int32x4_t test_vbicq_m_n_s32(int32x4_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 -37) // CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] // CHECK-NEXT: ret <8 x i16> [[TMP3]] // @@ -209,7 +209,7 @@ uint16x8_t test_vbicq_m_n_u16(uint16x8_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -1644167169) // CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -226,7 +226,7 @@ uint32x4_t test_vbicq_m_n_u32(uint32x4_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 13568) // CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] // CHECK-NEXT: ret <8 x i16> [[TMP3]] // @@ -243,7 +243,7 @@ int16x8_t test_vorrq_m_n_s16(int16x8_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 654311424) // CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -260,7 +260,7 @@ int32x4_t test_vorrq_m_n_s32(int32x4_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 175) // CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] // CHECK-NEXT: ret <8 x i16> [[TMP3]] // @@ -277,7 +277,7 @@ uint16x8_t test_vorrq_m_n_u16(uint16x8_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 89) // CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -294,7 +294,7 @@ uint32x4_t test_vorrq_m_n_u32(uint32x4_t a, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> [[INACTIVE:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 -3841), <8 x i16> [[INACTIVE:%.*]] // CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vmvnq_m_n_s16(int16x8_t inactive, mve_pred16_t p) @@ -310,7 +310,7 @@ int16x8_t test_vmvnq_m_n_s16(int16x8_t inactive, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[INACTIVE:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -18945), <4 x i32> [[INACTIVE:%.*]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vmvnq_m_n_s32(int32x4_t inactive, mve_pred16_t p) @@ -326,7 +326,7 @@ int32x4_t test_vmvnq_m_n_s32(int32x4_t inactive, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> [[INACTIVE:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 23295), <8 x i16> [[INACTIVE:%.*]] // CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vmvnq_m_n_u16(uint16x8_t inactive, mve_pred16_t p) @@ -342,7 +342,7 @@ uint16x8_t test_vmvnq_m_n_u16(uint16x8_t inactive, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[INACTIVE:%.*]] +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -63489), <4 x i32> [[INACTIVE:%.*]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vmvnq_m_n_u32(uint32x4_t inactive, mve_pred16_t p) @@ -358,7 +358,7 @@ uint32x4_t test_vmvnq_m_n_u32(uint32x4_t inactive, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> undef +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 767), <8 x i16> undef // CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vmvnq_x_n_s16(mve_pred16_t p) @@ -370,7 +370,7 @@ int16x8_t test_vmvnq_x_n_s16(mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> undef +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -12189697), <4 x i32> undef // CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vmvnq_x_n_s32(mve_pred16_t p) @@ -382,7 +382,7 @@ int32x4_t test_vmvnq_x_n_s32(mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> undef +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 -21505), <8 x i16> undef // CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vmvnq_x_n_u16(mve_pred16_t p) @@ -394,7 +394,7 @@ uint16x8_t test_vmvnq_x_n_u16(mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> undef +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -4865), <4 x i32> undef // CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vmvnq_x_n_u32(mve_pred16_t p) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp index 35af174e1f6f57..29719614d04fbc 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp +++ b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp @@ -8,7 +8,7 @@ // CHECK-LABEL: @_Z16test_vbicq_n_s1617__simd128_int16_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 11007) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vbicq_n_s16(int16x8_t a) @@ -22,7 +22,7 @@ int16x8_t test_vbicq_n_s16(int16x8_t a) // CHECK-LABEL: @_Z16test_vbicq_n_u3218__simd128_uint32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -8193) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vbicq_n_u32(uint32x4_t a) @@ -36,7 +36,7 @@ uint32x4_t test_vbicq_n_u32(uint32x4_t a) // CHECK-LABEL: @_Z16test_vorrq_n_s3217__simd128_int32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 65536) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vorrq_n_s32(int32x4_t a) @@ -50,7 +50,7 @@ int32x4_t test_vorrq_n_s32(int32x4_t a) // CHECK-LABEL: @_Z16test_vorrq_n_u1618__simd128_uint16_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 -4096) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vorrq_n_u16(uint16x8_t a) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c b/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c index dc70647a9c94d9..fbc63983b73b32 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c @@ -8,7 +8,7 @@ // CHECK-LABEL: @test_vbicq_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[B:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[B:%.*]], splat (i8 -1) // CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[A:%.*]], [[TMP0]] // CHECK-NEXT: ret <16 x i8> [[TMP1]] // @@ -23,7 +23,7 @@ uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) // CHECK-LABEL: @test_vbicq_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[B:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[B:%.*]], splat (i16 -1) // CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], [[TMP0]] // CHECK-NEXT: ret <8 x i16> [[TMP1]] // @@ -38,7 +38,7 @@ int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) // CHECK-LABEL: @test_vbicq_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], splat (i32 -1) // CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], [[TMP0]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -55,7 +55,7 @@ uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 -1) // CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[TMP0]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <4 x float> // CHECK-NEXT: ret <4 x float> [[TMP4]] diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c index 72a03ed8def3ec..1f5faf41bfd901 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c @@ -8,7 +8,7 @@ // CHECK-LABEL: @test_vshlq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], splat (i8 5) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // int8x16_t test_vshlq_n_s8(int8x16_t a) @@ -22,7 +22,7 @@ int8x16_t test_vshlq_n_s8(int8x16_t a) // CHECK-LABEL: @test_vshlq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], splat (i16 5) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vshlq_n_s16(int16x8_t a) @@ -36,7 +36,7 @@ int16x8_t test_vshlq_n_s16(int16x8_t a) // CHECK-LABEL: @test_vshlq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], splat (i32 18) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vshlq_n_s32(int32x4_t a) @@ -92,7 +92,7 @@ int32x4_t test_vshlq_n_s32_trivial(int32x4_t a) // CHECK-LABEL: @test_vshlq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], splat (i8 3) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // uint8x16_t test_vshlq_n_u8(uint8x16_t a) @@ -106,7 +106,7 @@ uint8x16_t test_vshlq_n_u8(uint8x16_t a) // CHECK-LABEL: @test_vshlq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], splat (i16 11) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vshlq_n_u16(uint16x8_t a) @@ -120,7 +120,7 @@ uint16x8_t test_vshlq_n_u16(uint16x8_t a) // CHECK-LABEL: @test_vshlq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], splat (i32 7) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vshlq_n_u32(uint32x4_t a) @@ -176,7 +176,7 @@ uint32x4_t test_vshlq_n_u32_trivial(uint32x4_t a) // CHECK-LABEL: @test_vshrq_n_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], splat (i8 4) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // int8x16_t test_vshrq_n_s8(int8x16_t a) @@ -190,7 +190,7 @@ int8x16_t test_vshrq_n_s8(int8x16_t a) // CHECK-LABEL: @test_vshrq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], splat (i16 10) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vshrq_n_s16(int16x8_t a) @@ -204,7 +204,7 @@ int16x8_t test_vshrq_n_s16(int16x8_t a) // CHECK-LABEL: @test_vshrq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], splat (i32 19) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vshrq_n_s32(int32x4_t a) @@ -218,7 +218,7 @@ int32x4_t test_vshrq_n_s32(int32x4_t a) // CHECK-LABEL: @test_vshrq_n_s8_trivial( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], splat (i8 7) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // int8x16_t test_vshrq_n_s8_trivial(int8x16_t a) @@ -232,7 +232,7 @@ int8x16_t test_vshrq_n_s8_trivial(int8x16_t a) // CHECK-LABEL: @test_vshrq_n_s16_trivial( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], splat (i16 15) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // int16x8_t test_vshrq_n_s16_trivial(int16x8_t a) @@ -246,7 +246,7 @@ int16x8_t test_vshrq_n_s16_trivial(int16x8_t a) // CHECK-LABEL: @test_vshrq_n_s32_trivial( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], splat (i32 31) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // int32x4_t test_vshrq_n_s32_trivial(int32x4_t a) @@ -260,7 +260,7 @@ int32x4_t test_vshrq_n_s32_trivial(int32x4_t a) // CHECK-LABEL: @test_vshrq_n_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[A:%.*]], splat (i8 1) // CHECK-NEXT: ret <16 x i8> [[TMP0]] // uint8x16_t test_vshrq_n_u8(uint8x16_t a) @@ -274,7 +274,7 @@ uint8x16_t test_vshrq_n_u8(uint8x16_t a) // CHECK-LABEL: @test_vshrq_n_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i16> [[A:%.*]], splat (i16 10) // CHECK-NEXT: ret <8 x i16> [[TMP0]] // uint16x8_t test_vshrq_n_u16(uint16x8_t a) @@ -288,7 +288,7 @@ uint16x8_t test_vshrq_n_u16(uint16x8_t a) // CHECK-LABEL: @test_vshrq_n_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[A:%.*]], splat (i32 10) // CHECK-NEXT: ret <4 x i32> [[TMP0]] // uint32x4_t test_vshrq_n_u32(uint32x4_t a) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vornq.c b/clang/test/CodeGen/arm-mve-intrinsics/vornq.c index f8db91cad6b891..60a0d8835985cc 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vornq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vornq.c @@ -8,7 +8,7 @@ // CHECK-LABEL: @test_vornq_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[B:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[B:%.*]], splat (i8 -1) // CHECK-NEXT: [[TMP1:%.*]] = or <16 x i8> [[A:%.*]], [[TMP0]] // CHECK-NEXT: ret <16 x i8> [[TMP1]] // @@ -23,7 +23,7 @@ uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) // CHECK-LABEL: @test_vornq_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[B:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[B:%.*]], splat (i16 -1) // CHECK-NEXT: [[TMP1:%.*]] = or <8 x i16> [[A:%.*]], [[TMP0]] // CHECK-NEXT: ret <8 x i16> [[TMP1]] // @@ -38,7 +38,7 @@ int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) // CHECK-LABEL: @test_vornq_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], splat (i32 -1) // CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[A:%.*]], [[TMP0]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -55,7 +55,7 @@ uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 -1) // CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP0]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <4 x float> // CHECK-NEXT: ret <4 x float> [[TMP4]] diff --git a/clang/test/CodeGen/arm-neon-shifts.c b/clang/test/CodeGen/arm-neon-shifts.c index c7f5d2611c448c..129d385abc56d3 100644 --- a/clang/test/CodeGen/arm-neon-shifts.c +++ b/clang/test/CodeGen/arm-neon-shifts.c @@ -9,13 +9,13 @@ uint8x8_t test_shift_vshr(uint8x8_t a) { // CHECK-LABEL: test_shift_vshr - // CHECK: %{{.*}} = lshr <8 x i8> %a, + // CHECK: %{{.*}} = lshr <8 x i8> %a, splat (i8 5) return vshr_n_u8(a, 5); } int8x8_t test_shift_vshr_smax(int8x8_t a) { // CHECK-LABEL: test_shift_vshr_smax - // CHECK: %{{.*}} = ashr <8 x i8> %a, + // CHECK: %{{.*}} = ashr <8 x i8> %a, splat (i8 7) return vshr_n_s8(a, 8); } @@ -27,14 +27,14 @@ uint8x8_t test_shift_vshr_umax(uint8x8_t a) { uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: test_shift_vsra - // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, + // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, splat (i8 5) // CHECK: %{{.*}} = add <8 x i8> %a, %[[SHR]] return vsra_n_u8(a, b, 5); } int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) { // CHECK-LABEL: test_shift_vsra_smax - // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, + // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, splat (i8 7) // CHECK: %{{.*}} = add <8 x i8> %a, %[[SHR]] return vsra_n_s8(a, b, 8); } diff --git a/clang/test/CodeGen/arm_neon_intrinsics.c b/clang/test/CodeGen/arm_neon_intrinsics.c index 93747bc15c6add..9f43dd2be5af58 100644 --- a/clang/test/CodeGen/arm_neon_intrinsics.c +++ b/clang/test/CodeGen/arm_neon_intrinsics.c @@ -583,7 +583,7 @@ uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8) // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VADDHN2_I]] int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { @@ -594,7 +594,7 @@ int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16) // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VADDHN2_I]] int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { @@ -605,7 +605,7 @@ int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32) // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VADDHN2_I]] int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { @@ -616,7 +616,7 @@ int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8) // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VADDHN2_I]] uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { @@ -627,7 +627,7 @@ uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16) // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VADDHN2_I]] uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { @@ -638,7 +638,7 @@ uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b -// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], +// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32) // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VADDHN2_I]] uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { @@ -872,7 +872,7 @@ uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: @test_vbic_s8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[AND_I]] int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) { @@ -880,7 +880,7 @@ int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) { } // CHECK-LABEL: @test_vbic_s16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[AND_I]] int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) { @@ -888,7 +888,7 @@ int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) { } // CHECK-LABEL: @test_vbic_s32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[AND_I]] int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) { @@ -896,7 +896,7 @@ int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vbic_s64( -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[AND_I]] int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) { @@ -904,7 +904,7 @@ int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vbic_u8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[AND_I]] uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) { @@ -912,7 +912,7 @@ uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) { } // CHECK-LABEL: @test_vbic_u16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[AND_I]] uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) { @@ -920,7 +920,7 @@ uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) { } // CHECK-LABEL: @test_vbic_u32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[AND_I]] uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) { @@ -928,7 +928,7 @@ uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vbic_u64( -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[AND_I]] uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) { @@ -936,7 +936,7 @@ uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vbicq_s8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[AND_I]] int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) { @@ -944,7 +944,7 @@ int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) { } // CHECK-LABEL: @test_vbicq_s16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[AND_I]] int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) { @@ -952,7 +952,7 @@ int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) { } // CHECK-LABEL: @test_vbicq_s32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[AND_I]] int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) { @@ -960,7 +960,7 @@ int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: @test_vbicq_s64( -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[AND_I]] int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { @@ -968,7 +968,7 @@ int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vbicq_u8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[AND_I]] uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) { @@ -976,7 +976,7 @@ uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) { } // CHECK-LABEL: @test_vbicq_u16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[AND_I]] uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) { @@ -984,7 +984,7 @@ uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) { } // CHECK-LABEL: @test_vbicq_u32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[AND_I]] uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) { @@ -992,7 +992,7 @@ uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: @test_vbicq_u64( -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[AND_I]] uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) { @@ -8414,98 +8414,98 @@ uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) { } // CHECK-LABEL: @test_vmvn_s8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1) // CHECK: ret <8 x i8> [[NEG_I]] int8x8_t test_vmvn_s8(int8x8_t a) { return vmvn_s8(a); } // CHECK-LABEL: @test_vmvn_s16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1) // CHECK: ret <4 x i16> [[NEG_I]] int16x4_t test_vmvn_s16(int16x4_t a) { return vmvn_s16(a); } // CHECK-LABEL: @test_vmvn_s32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1) // CHECK: ret <2 x i32> [[NEG_I]] int32x2_t test_vmvn_s32(int32x2_t a) { return vmvn_s32(a); } // CHECK-LABEL: @test_vmvn_u8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1) // CHECK: ret <8 x i8> [[NEG_I]] uint8x8_t test_vmvn_u8(uint8x8_t a) { return vmvn_u8(a); } // CHECK-LABEL: @test_vmvn_u16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1) // CHECK: ret <4 x i16> [[NEG_I]] uint16x4_t test_vmvn_u16(uint16x4_t a) { return vmvn_u16(a); } // CHECK-LABEL: @test_vmvn_u32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1) // CHECK: ret <2 x i32> [[NEG_I]] uint32x2_t test_vmvn_u32(uint32x2_t a) { return vmvn_u32(a); } // CHECK-LABEL: @test_vmvn_p8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1) // CHECK: ret <8 x i8> [[NEG_I]] poly8x8_t test_vmvn_p8(poly8x8_t a) { return vmvn_p8(a); } // CHECK-LABEL: @test_vmvnq_s8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1) // CHECK: ret <16 x i8> [[NEG_I]] int8x16_t test_vmvnq_s8(int8x16_t a) { return vmvnq_s8(a); } // CHECK-LABEL: @test_vmvnq_s16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1) // CHECK: ret <8 x i16> [[NEG_I]] int16x8_t test_vmvnq_s16(int16x8_t a) { return vmvnq_s16(a); } // CHECK-LABEL: @test_vmvnq_s32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1) // CHECK: ret <4 x i32> [[NEG_I]] int32x4_t test_vmvnq_s32(int32x4_t a) { return vmvnq_s32(a); } // CHECK-LABEL: @test_vmvnq_u8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1) // CHECK: ret <16 x i8> [[NEG_I]] uint8x16_t test_vmvnq_u8(uint8x16_t a) { return vmvnq_u8(a); } // CHECK-LABEL: @test_vmvnq_u16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1) // CHECK: ret <8 x i16> [[NEG_I]] uint16x8_t test_vmvnq_u16(uint16x8_t a) { return vmvnq_u16(a); } // CHECK-LABEL: @test_vmvnq_u32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1) // CHECK: ret <4 x i32> [[NEG_I]] uint32x4_t test_vmvnq_u32(uint32x4_t a) { return vmvnq_u32(a); } // CHECK-LABEL: @test_vmvnq_p8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1) // CHECK: ret <16 x i8> [[NEG_I]] poly8x16_t test_vmvnq_p8(poly8x16_t a) { return vmvnq_p8(a); @@ -8568,7 +8568,7 @@ float32x4_t test_vnegq_f32(float32x4_t a) { } // CHECK-LABEL: @test_vorn_s8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[OR_I]] int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) { @@ -8576,7 +8576,7 @@ int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) { } // CHECK-LABEL: @test_vorn_s16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[OR_I]] int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) { @@ -8584,7 +8584,7 @@ int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) { } // CHECK-LABEL: @test_vorn_s32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[OR_I]] int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) { @@ -8592,7 +8592,7 @@ int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) { } // CHECK-LABEL: @test_vorn_s64( -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[OR_I]] int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) { @@ -8600,7 +8600,7 @@ int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vorn_u8( -// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[OR_I]] uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) { @@ -8608,7 +8608,7 @@ uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) { } // CHECK-LABEL: @test_vorn_u16( -// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[OR_I]] uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) { @@ -8616,7 +8616,7 @@ uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) { } // CHECK-LABEL: @test_vorn_u32( -// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[OR_I]] uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) { @@ -8624,7 +8624,7 @@ uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) { } // CHECK-LABEL: @test_vorn_u64( -// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[OR_I]] uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) { @@ -8632,7 +8632,7 @@ uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vornq_s8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[OR_I]] int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) { @@ -8640,7 +8640,7 @@ int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) { } // CHECK-LABEL: @test_vornq_s16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[OR_I]] int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) { @@ -8648,7 +8648,7 @@ int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) { } // CHECK-LABEL: @test_vornq_s32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[OR_I]] int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) { @@ -8656,7 +8656,7 @@ int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) { } // CHECK-LABEL: @test_vornq_s64( -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[OR_I]] int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { @@ -8664,7 +8664,7 @@ int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vornq_u8( -// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, +// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1) // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[OR_I]] uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) { @@ -8672,7 +8672,7 @@ uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) { } // CHECK-LABEL: @test_vornq_u16( -// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, +// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1) // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[OR_I]] uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) { @@ -8680,7 +8680,7 @@ uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) { } // CHECK-LABEL: @test_vornq_u32( -// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, +// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1) // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[OR_I]] uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) { @@ -8688,7 +8688,7 @@ uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) { } // CHECK-LABEL: @test_vornq_u64( -// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, +// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1) // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[OR_I]] uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) { @@ -10200,7 +10200,7 @@ uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vqrshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> ) +// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VQRSHRN_N1]] int8x8_t test_vqrshrn_n_s16(int16x8_t a) { return vqrshrn_n_s16(a, 1); @@ -10209,7 +10209,7 @@ int8x8_t test_vqrshrn_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqrshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> ) +// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VQRSHRN_N1]] int16x4_t test_vqrshrn_n_s32(int32x4_t a) { return vqrshrn_n_s32(a, 1); @@ -10218,7 +10218,7 @@ int16x4_t test_vqrshrn_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqrshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> ) +// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VQRSHRN_N1]] int32x2_t test_vqrshrn_n_s64(int64x2_t a) { return vqrshrn_n_s64(a, 1); @@ -10227,7 +10227,7 @@ int32x2_t test_vqrshrn_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vqrshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> ) +// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VQRSHRN_N1]] uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { return vqrshrn_n_u16(a, 1); @@ -10236,7 +10236,7 @@ uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vqrshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> ) +// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VQRSHRN_N1]] uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { return vqrshrn_n_u32(a, 1); @@ -10245,7 +10245,7 @@ uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vqrshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> ) +// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VQRSHRN_N1]] uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { return vqrshrn_n_u64(a, 1); @@ -10254,7 +10254,7 @@ uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vqrshrun_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> ) +// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VQRSHRUN_N1]] uint8x8_t test_vqrshrun_n_s16(int16x8_t a) { return vqrshrun_n_s16(a, 1); @@ -10263,7 +10263,7 @@ uint8x8_t test_vqrshrun_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqrshrun_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> ) +// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VQRSHRUN_N1]] uint16x4_t test_vqrshrun_n_s32(int32x4_t a) { return vqrshrun_n_s32(a, 1); @@ -10272,7 +10272,7 @@ uint16x4_t test_vqrshrun_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqrshrun_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> ) +// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VQRSHRUN_N1]] uint32x2_t test_vqrshrun_n_s64(int64x2_t a) { return vqrshrun_n_s64(a, 1); @@ -10427,7 +10427,7 @@ uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqshlu_n_s8( -// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 1) // CHECK: ret <8 x i8> [[VQSHLU_N]] uint8x8_t test_vqshlu_n_s8(int8x8_t a) { return vqshlu_n_s8(a, 1); @@ -10436,7 +10436,7 @@ uint8x8_t test_vqshlu_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vqshlu_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> splat (i16 1) // CHECK: ret <4 x i16> [[VQSHLU_N1]] uint16x4_t test_vqshlu_n_s16(int16x4_t a) { return vqshlu_n_s16(a, 1); @@ -10445,7 +10445,7 @@ uint16x4_t test_vqshlu_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vqshlu_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> splat (i32 1)) // CHECK: ret <2 x i32> [[VQSHLU_N1]] uint32x2_t test_vqshlu_n_s32(int32x2_t a) { return vqshlu_n_s32(a, 1); @@ -10454,14 +10454,14 @@ uint32x2_t test_vqshlu_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vqshlu_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VQSHLU_N1]] uint64x1_t test_vqshlu_n_s64(int64x1_t a) { return vqshlu_n_s64(a, 1); } // CHECK-LABEL: @test_vqshluq_n_s8( -// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 1)) // CHECK: ret <16 x i8> [[VQSHLU_N]] uint8x16_t test_vqshluq_n_s8(int8x16_t a) { return vqshluq_n_s8(a, 1); @@ -10470,7 +10470,7 @@ uint8x16_t test_vqshluq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vqshluq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> splat (i16 1)) // CHECK: ret <8 x i16> [[VQSHLU_N1]] uint16x8_t test_vqshluq_n_s16(int16x8_t a) { return vqshluq_n_s16(a, 1); @@ -10479,7 +10479,7 @@ uint16x8_t test_vqshluq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqshluq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> splat (i32 1)) // CHECK: ret <4 x i32> [[VQSHLU_N1]] uint32x4_t test_vqshluq_n_s32(int32x4_t a) { return vqshluq_n_s32(a, 1); @@ -10488,14 +10488,14 @@ uint32x4_t test_vqshluq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqshluq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> ) +// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> splat (i64 1)) // CHECK: ret <2 x i64> [[VQSHLU_N1]] uint64x2_t test_vqshluq_n_s64(int64x2_t a) { return vqshluq_n_s64(a, 1); } // CHECK-LABEL: @test_vqshl_n_s8( -// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> splat (i8 1)) // CHECK: ret <8 x i8> [[VQSHL_N]] int8x8_t test_vqshl_n_s8(int8x8_t a) { return vqshl_n_s8(a, 1); @@ -10504,7 +10504,7 @@ int8x8_t test_vqshl_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vqshl_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> ) +// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> splat (i16 1)) // CHECK: ret <4 x i16> [[VQSHL_N1]] int16x4_t test_vqshl_n_s16(int16x4_t a) { return vqshl_n_s16(a, 1); @@ -10513,7 +10513,7 @@ int16x4_t test_vqshl_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vqshl_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> ) +// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> splat (i32 1)) // CHECK: ret <2 x i32> [[VQSHL_N1]] int32x2_t test_vqshl_n_s32(int32x2_t a) { return vqshl_n_s32(a, 1); @@ -10522,14 +10522,14 @@ int32x2_t test_vqshl_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vqshl_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) +// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VQSHL_N1]] int64x1_t test_vqshl_n_s64(int64x1_t a) { return vqshl_n_s64(a, 1); } // CHECK-LABEL: @test_vqshl_n_u8( -// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 1)) // CHECK: ret <8 x i8> [[VQSHL_N]] uint8x8_t test_vqshl_n_u8(uint8x8_t a) { return vqshl_n_u8(a, 1); @@ -10538,7 +10538,7 @@ uint8x8_t test_vqshl_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vqshl_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> ) +// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> splat (i16 1)) // CHECK: ret <4 x i16> [[VQSHL_N1]] uint16x4_t test_vqshl_n_u16(uint16x4_t a) { return vqshl_n_u16(a, 1); @@ -10547,7 +10547,7 @@ uint16x4_t test_vqshl_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vqshl_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> ) +// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> splat (i32 1)) // CHECK: ret <2 x i32> [[VQSHL_N1]] uint32x2_t test_vqshl_n_u32(uint32x2_t a) { return vqshl_n_u32(a, 1); @@ -10556,14 +10556,14 @@ uint32x2_t test_vqshl_n_u32(uint32x2_t a) { // CHECK-LABEL: @test_vqshl_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) +// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VQSHL_N1]] uint64x1_t test_vqshl_n_u64(uint64x1_t a) { return vqshl_n_u64(a, 1); } // CHECK-LABEL: @test_vqshlq_n_s8( -// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> splat (i8 1)) // CHECK: ret <16 x i8> [[VQSHL_N]] int8x16_t test_vqshlq_n_s8(int8x16_t a) { return vqshlq_n_s8(a, 1); @@ -10572,7 +10572,7 @@ int8x16_t test_vqshlq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vqshlq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> ) +// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> splat (i16 1)) // CHECK: ret <8 x i16> [[VQSHL_N1]] int16x8_t test_vqshlq_n_s16(int16x8_t a) { return vqshlq_n_s16(a, 1); @@ -10581,7 +10581,7 @@ int16x8_t test_vqshlq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqshlq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> ) +// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> splat (i32 1)) // CHECK: ret <4 x i32> [[VQSHL_N1]] int32x4_t test_vqshlq_n_s32(int32x4_t a) { return vqshlq_n_s32(a, 1); @@ -10590,14 +10590,14 @@ int32x4_t test_vqshlq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqshlq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> ) +// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> splat (i64 1)) // CHECK: ret <2 x i64> [[VQSHL_N1]] int64x2_t test_vqshlq_n_s64(int64x2_t a) { return vqshlq_n_s64(a, 1); } // CHECK-LABEL: @test_vqshlq_n_u8( -// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 1)) // CHECK: ret <16 x i8> [[VQSHL_N]] uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { return vqshlq_n_u8(a, 1); @@ -10606,7 +10606,7 @@ uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vqshlq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> ) +// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> splat (i16 1)) // CHECK: ret <8 x i16> [[VQSHL_N1]] uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { return vqshlq_n_u16(a, 1); @@ -10615,7 +10615,7 @@ uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vqshlq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> ) +// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> splat (i32 1)) // CHECK: ret <4 x i32> [[VQSHL_N1]] uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { return vqshlq_n_u32(a, 1); @@ -10624,7 +10624,7 @@ uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vqshlq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> ) +// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> splat (i64 1)) // CHECK: ret <2 x i64> [[VQSHL_N1]] uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { return vqshlq_n_u64(a, 1); @@ -10633,7 +10633,7 @@ uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vqshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> ) +// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VQSHRN_N1]] int8x8_t test_vqshrn_n_s16(int16x8_t a) { return vqshrn_n_s16(a, 1); @@ -10642,7 +10642,7 @@ int8x8_t test_vqshrn_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> ) +// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VQSHRN_N1]] int16x4_t test_vqshrn_n_s32(int32x4_t a) { return vqshrn_n_s32(a, 1); @@ -10651,7 +10651,7 @@ int16x4_t test_vqshrn_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> ) +// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VQSHRN_N1]] int32x2_t test_vqshrn_n_s64(int64x2_t a) { return vqshrn_n_s64(a, 1); @@ -10660,7 +10660,7 @@ int32x2_t test_vqshrn_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vqshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> ) +// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VQSHRN_N1]] uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { return vqshrn_n_u16(a, 1); @@ -10669,7 +10669,7 @@ uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vqshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> ) +// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VQSHRN_N1]] uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { return vqshrn_n_u32(a, 1); @@ -10678,7 +10678,7 @@ uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vqshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> ) +// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VQSHRN_N1]] uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { return vqshrn_n_u64(a, 1); @@ -10687,7 +10687,7 @@ uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vqshrun_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> ) +// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VQSHRUN_N1]] uint8x8_t test_vqshrun_n_s16(int16x8_t a) { return vqshrun_n_s16(a, 1); @@ -10696,7 +10696,7 @@ uint8x8_t test_vqshrun_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vqshrun_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> ) +// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VQSHRUN_N1]] uint16x4_t test_vqshrun_n_s32(int32x4_t a) { return vqshrun_n_s32(a, 1); @@ -10705,7 +10705,7 @@ uint16x4_t test_vqshrun_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vqshrun_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> ) +// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VQSHRUN_N1]] uint32x2_t test_vqshrun_n_s64(int64x2_t a) { return vqshrun_n_s64(a, 1); @@ -13296,7 +13296,7 @@ uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vrshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> ) +// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VRSHRN_N1]] int8x8_t test_vrshrn_n_s16(int16x8_t a) { return vrshrn_n_s16(a, 1); @@ -13305,7 +13305,7 @@ int8x8_t test_vrshrn_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vrshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> ) +// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VRSHRN_N1]] int16x4_t test_vrshrn_n_s32(int32x4_t a) { return vrshrn_n_s32(a, 1); @@ -13314,7 +13314,7 @@ int16x4_t test_vrshrn_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vrshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> ) +// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VRSHRN_N1]] int32x2_t test_vrshrn_n_s64(int64x2_t a) { return vrshrn_n_s64(a, 1); @@ -13323,7 +13323,7 @@ int32x2_t test_vrshrn_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vrshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> ) +// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i8> [[VRSHRN_N1]] uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { return vrshrn_n_u16(a, 1); @@ -13332,7 +13332,7 @@ uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vrshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> ) +// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i16> [[VRSHRN_N1]] uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { return vrshrn_n_u32(a, 1); @@ -13341,14 +13341,14 @@ uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vrshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> ) +// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i32> [[VRSHRN_N1]] uint32x2_t test_vrshrn_n_u64(uint64x2_t a) { return vrshrn_n_u64(a, 1); } // CHECK-LABEL: @test_vrshr_n_s8( -// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -1)) // CHECK: ret <8 x i8> [[VRSHR_N]] int8x8_t test_vrshr_n_s8(int8x8_t a) { return vrshr_n_s8(a, 1); @@ -13357,7 +13357,7 @@ int8x8_t test_vrshr_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vrshr_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -1)) // CHECK: ret <4 x i16> [[VRSHR_N1]] int16x4_t test_vrshr_n_s16(int16x4_t a) { return vrshr_n_s16(a, 1); @@ -13366,7 +13366,7 @@ int16x4_t test_vrshr_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vrshr_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -1)) // CHECK: ret <2 x i32> [[VRSHR_N1]] int32x2_t test_vrshr_n_s32(int32x2_t a) { return vrshr_n_s32(a, 1); @@ -13375,14 +13375,14 @@ int32x2_t test_vrshr_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vrshr_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) // CHECK: ret <1 x i64> [[VRSHR_N1]] int64x1_t test_vrshr_n_s64(int64x1_t a) { return vrshr_n_s64(a, 1); } // CHECK-LABEL: @test_vrshr_n_u8( -// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -1)) // CHECK: ret <8 x i8> [[VRSHR_N]] uint8x8_t test_vrshr_n_u8(uint8x8_t a) { return vrshr_n_u8(a, 1); @@ -13391,7 +13391,7 @@ uint8x8_t test_vrshr_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vrshr_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -1)) // CHECK: ret <4 x i16> [[VRSHR_N1]] uint16x4_t test_vrshr_n_u16(uint16x4_t a) { return vrshr_n_u16(a, 1); @@ -13400,7 +13400,7 @@ uint16x4_t test_vrshr_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vrshr_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -1)) // CHECK: ret <2 x i32> [[VRSHR_N1]] uint32x2_t test_vrshr_n_u32(uint32x2_t a) { return vrshr_n_u32(a, 1); @@ -13409,14 +13409,14 @@ uint32x2_t test_vrshr_n_u32(uint32x2_t a) { // CHECK-LABEL: @test_vrshr_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) // CHECK: ret <1 x i64> [[VRSHR_N1]] uint64x1_t test_vrshr_n_u64(uint64x1_t a) { return vrshr_n_u64(a, 1); } // CHECK-LABEL: @test_vrshrq_n_s8( -// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -1)) // CHECK: ret <16 x i8> [[VRSHR_N]] int8x16_t test_vrshrq_n_s8(int8x16_t a) { return vrshrq_n_s8(a, 1); @@ -13425,7 +13425,7 @@ int8x16_t test_vrshrq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vrshrq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i16> [[VRSHR_N1]] int16x8_t test_vrshrq_n_s16(int16x8_t a) { return vrshrq_n_s16(a, 1); @@ -13434,7 +13434,7 @@ int16x8_t test_vrshrq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vrshrq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i32> [[VRSHR_N1]] int32x4_t test_vrshrq_n_s32(int32x4_t a) { return vrshrq_n_s32(a, 1); @@ -13443,14 +13443,14 @@ int32x4_t test_vrshrq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vrshrq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i64> [[VRSHR_N1]] int64x2_t test_vrshrq_n_s64(int64x2_t a) { return vrshrq_n_s64(a, 1); } // CHECK-LABEL: @test_vrshrq_n_u8( -// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> ) +// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -1)) // CHECK: ret <16 x i8> [[VRSHR_N]] uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { return vrshrq_n_u8(a, 1); @@ -13459,7 +13459,7 @@ uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vrshrq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) +// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i16> [[VRSHR_N1]] uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { return vrshrq_n_u16(a, 1); @@ -13468,7 +13468,7 @@ uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vrshrq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) +// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i32> [[VRSHR_N1]] uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { return vrshrq_n_u32(a, 1); @@ -13477,7 +13477,7 @@ uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vrshrq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) +// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i64> [[VRSHR_N1]] uint64x2_t test_vrshrq_n_u64(uint64x2_t a) { return vrshrq_n_u64(a, 1); @@ -13536,7 +13536,7 @@ float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) { } // CHECK-LABEL: @test_vrsra_n_s8( -// CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> ) +// CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -1)) // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]] // CHECK: ret <8 x i8> [[VRSRA_N]] int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { @@ -13548,7 +13548,7 @@ int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> ) +// CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> splat (i16 -1)) // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]] // CHECK: ret <4 x i16> [[VRSRA_N]] int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { @@ -13560,7 +13560,7 @@ int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> ) +// CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> splat (i32 -1)) // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]] // CHECK: ret <2 x i32> [[VRSRA_N]] int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { @@ -13572,7 +13572,7 @@ int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> ) +// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> splat (i64 -1)) // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]] // CHECK: ret <1 x i64> [[VRSRA_N]] int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { @@ -13580,7 +13580,7 @@ int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vrsra_n_u8( -// CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> ) +// CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -1)) // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]] // CHECK: ret <8 x i8> [[VRSRA_N]] uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { @@ -13592,7 +13592,7 @@ uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> ) +// CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> splat (i16 -1)) // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]] // CHECK: ret <4 x i16> [[VRSRA_N]] uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { @@ -13604,7 +13604,7 @@ uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> ) +// CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> splat (i32 -1)) // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]] // CHECK: ret <2 x i32> [[VRSRA_N]] uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { @@ -13616,7 +13616,7 @@ uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> ) +// CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> splat (i64 -1)) // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]] // CHECK: ret <1 x i64> [[VRSRA_N]] uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { @@ -13624,7 +13624,7 @@ uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vrsraq_n_s8( -// CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> ) +// CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -1)) // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]] // CHECK: ret <16 x i8> [[VRSRA_N]] int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { @@ -13636,7 +13636,7 @@ int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> ) +// CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> splat (i16 -1)) // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]] // CHECK: ret <8 x i16> [[VRSRA_N]] int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { @@ -13648,7 +13648,7 @@ int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> ) +// CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> splat (i32 -1)) // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] // CHECK: ret <4 x i32> [[VRSRA_N]] int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { @@ -13660,7 +13660,7 @@ int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> ) +// CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> splat (i64 -1)) // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] // CHECK: ret <2 x i64> [[VRSRA_N]] int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { @@ -13668,7 +13668,7 @@ int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vrsraq_n_u8( -// CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> ) +// CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -1)) // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]] // CHECK: ret <16 x i8> [[VRSRA_N]] uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { @@ -13680,7 +13680,7 @@ uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> ) +// CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> splat (i16 -1)) // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]] // CHECK: ret <8 x i16> [[VRSRA_N]] uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { @@ -13692,7 +13692,7 @@ uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> ) +// CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> splat (i32 -1)) // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] // CHECK: ret <4 x i32> [[VRSRA_N]] uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { @@ -13704,7 +13704,7 @@ uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> ) +// CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> splat (i64 -1)) // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] // CHECK: ret <2 x i64> [[VRSRA_N]] uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) { @@ -14107,7 +14107,7 @@ uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vshll_n_s8( // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 1) // CHECK: ret <8 x i16> [[VSHLL_N]] int16x8_t test_vshll_n_s8(int8x8_t a) { return vshll_n_s8(a, 1); @@ -14117,7 +14117,7 @@ int16x8_t test_vshll_n_s8(int8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1) // CHECK: ret <4 x i32> [[VSHLL_N]] int32x4_t test_vshll_n_s16(int16x4_t a) { return vshll_n_s16(a, 1); @@ -14127,7 +14127,7 @@ int32x4_t test_vshll_n_s16(int16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1) // CHECK: ret <2 x i64> [[VSHLL_N]] int64x2_t test_vshll_n_s32(int32x2_t a) { return vshll_n_s32(a, 1); @@ -14135,7 +14135,7 @@ int64x2_t test_vshll_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vshll_n_u8( // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> -// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], +// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 1) // CHECK: ret <8 x i16> [[VSHLL_N]] uint16x8_t test_vshll_n_u8(uint8x8_t a) { return vshll_n_u8(a, 1); @@ -14145,7 +14145,7 @@ uint16x8_t test_vshll_n_u8(uint8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1) // CHECK: ret <4 x i32> [[VSHLL_N]] uint32x4_t test_vshll_n_u16(uint16x4_t a) { return vshll_n_u16(a, 1); @@ -14155,14 +14155,14 @@ uint32x4_t test_vshll_n_u16(uint16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], +// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1) // CHECK: ret <2 x i64> [[VSHLL_N]] uint64x2_t test_vshll_n_u32(uint32x2_t a) { return vshll_n_u32(a, 1); } // CHECK-LABEL: @test_vshl_n_s8( -// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 1) // CHECK: ret <8 x i8> [[VSHL_N]] int8x8_t test_vshl_n_s8(int8x8_t a) { return vshl_n_s8(a, 1); @@ -14171,7 +14171,7 @@ int8x8_t test_vshl_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vshl_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <4 x i16> [[VSHL_N]] int16x4_t test_vshl_n_s16(int16x4_t a) { return vshl_n_s16(a, 1); @@ -14180,7 +14180,7 @@ int16x4_t test_vshl_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vshl_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <2 x i32> [[VSHL_N]] int32x2_t test_vshl_n_s32(int32x2_t a) { return vshl_n_s32(a, 1); @@ -14189,14 +14189,14 @@ int32x2_t test_vshl_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vshl_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHL_N]] int64x1_t test_vshl_n_s64(int64x1_t a) { return vshl_n_s64(a, 1); } // CHECK-LABEL: @test_vshl_n_u8( -// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 1) // CHECK: ret <8 x i8> [[VSHL_N]] uint8x8_t test_vshl_n_u8(uint8x8_t a) { return vshl_n_u8(a, 1); @@ -14205,7 +14205,7 @@ uint8x8_t test_vshl_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vshl_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <4 x i16> [[VSHL_N]] uint16x4_t test_vshl_n_u16(uint16x4_t a) { return vshl_n_u16(a, 1); @@ -14214,7 +14214,7 @@ uint16x4_t test_vshl_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vshl_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <2 x i32> [[VSHL_N]] uint32x2_t test_vshl_n_u32(uint32x2_t a) { return vshl_n_u32(a, 1); @@ -14223,14 +14223,14 @@ uint32x2_t test_vshl_n_u32(uint32x2_t a) { // CHECK-LABEL: @test_vshl_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHL_N]] uint64x1_t test_vshl_n_u64(uint64x1_t a) { return vshl_n_u64(a, 1); } // CHECK-LABEL: @test_vshlq_n_s8( -// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 1) // CHECK: ret <16 x i8> [[VSHL_N]] int8x16_t test_vshlq_n_s8(int8x16_t a) { return vshlq_n_s8(a, 1); @@ -14239,7 +14239,7 @@ int8x16_t test_vshlq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vshlq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <8 x i16> [[VSHL_N]] int16x8_t test_vshlq_n_s16(int16x8_t a) { return vshlq_n_s16(a, 1); @@ -14248,7 +14248,7 @@ int16x8_t test_vshlq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vshlq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <4 x i32> [[VSHL_N]] int32x4_t test_vshlq_n_s32(int32x4_t a) { return vshlq_n_s32(a, 1); @@ -14257,14 +14257,14 @@ int32x4_t test_vshlq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshlq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <2 x i64> [[VSHL_N]] int64x2_t test_vshlq_n_s64(int64x2_t a) { return vshlq_n_s64(a, 1); } // CHECK-LABEL: @test_vshlq_n_u8( -// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, +// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 1) // CHECK: ret <16 x i8> [[VSHL_N]] uint8x16_t test_vshlq_n_u8(uint8x16_t a) { return vshlq_n_u8(a, 1); @@ -14273,7 +14273,7 @@ uint8x16_t test_vshlq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vshlq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <8 x i16> [[VSHL_N]] uint16x8_t test_vshlq_n_u16(uint16x8_t a) { return vshlq_n_u16(a, 1); @@ -14282,7 +14282,7 @@ uint16x8_t test_vshlq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vshlq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <4 x i32> [[VSHL_N]] uint32x4_t test_vshlq_n_u32(uint32x4_t a) { return vshlq_n_u32(a, 1); @@ -14291,7 +14291,7 @@ uint32x4_t test_vshlq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vshlq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], +// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <2 x i64> [[VSHL_N]] uint64x2_t test_vshlq_n_u64(uint64x2_t a) { return vshlq_n_u64(a, 1); @@ -14300,7 +14300,7 @@ uint64x2_t test_vshlq_n_u64(uint64x2_t a) { // CHECK-LABEL: @test_vshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 1) // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> // CHECK: ret <8 x i8> [[VSHRN_N]] int8x8_t test_vshrn_n_s16(int16x8_t a) { @@ -14310,7 +14310,7 @@ int8x8_t test_vshrn_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 1) // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> // CHECK: ret <4 x i16> [[VSHRN_N]] int16x4_t test_vshrn_n_s32(int32x4_t a) { @@ -14320,7 +14320,7 @@ int16x4_t test_vshrn_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], +// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 1) // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> // CHECK: ret <2 x i32> [[VSHRN_N]] int32x2_t test_vshrn_n_s64(int64x2_t a) { @@ -14330,7 +14330,7 @@ int32x2_t test_vshrn_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1) // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> // CHECK: ret <8 x i8> [[VSHRN_N]] uint8x8_t test_vshrn_n_u16(uint16x8_t a) { @@ -14340,7 +14340,7 @@ uint8x8_t test_vshrn_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1) // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> // CHECK: ret <4 x i16> [[VSHRN_N]] uint16x4_t test_vshrn_n_u32(uint32x4_t a) { @@ -14350,7 +14350,7 @@ uint16x4_t test_vshrn_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1) // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> // CHECK: ret <2 x i32> [[VSHRN_N]] uint32x2_t test_vshrn_n_u64(uint64x2_t a) { @@ -14358,7 +14358,7 @@ uint32x2_t test_vshrn_n_u64(uint64x2_t a) { } // CHECK-LABEL: @test_vshr_n_s8( -// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, splat (i8 1) // CHECK: ret <8 x i8> [[VSHR_N]] int8x8_t test_vshr_n_s8(int8x8_t a) { return vshr_n_s8(a, 1); @@ -14367,7 +14367,7 @@ int8x8_t test_vshr_n_s8(int8x8_t a) { // CHECK-LABEL: @test_vshr_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <4 x i16> [[VSHR_N]] int16x4_t test_vshr_n_s16(int16x4_t a) { return vshr_n_s16(a, 1); @@ -14376,7 +14376,7 @@ int16x4_t test_vshr_n_s16(int16x4_t a) { // CHECK-LABEL: @test_vshr_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <2 x i32> [[VSHR_N]] int32x2_t test_vshr_n_s32(int32x2_t a) { return vshr_n_s32(a, 1); @@ -14385,14 +14385,14 @@ int32x2_t test_vshr_n_s32(int32x2_t a) { // CHECK-LABEL: @test_vshr_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHR_N]] int64x1_t test_vshr_n_s64(int64x1_t a) { return vshr_n_s64(a, 1); } // CHECK-LABEL: @test_vshr_n_u8( -// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, splat (i8 1) // CHECK: ret <8 x i8> [[VSHR_N]] uint8x8_t test_vshr_n_u8(uint8x8_t a) { return vshr_n_u8(a, 1); @@ -14401,7 +14401,7 @@ uint8x8_t test_vshr_n_u8(uint8x8_t a) { // CHECK-LABEL: @test_vshr_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <4 x i16> [[VSHR_N]] uint16x4_t test_vshr_n_u16(uint16x4_t a) { return vshr_n_u16(a, 1); @@ -14410,7 +14410,7 @@ uint16x4_t test_vshr_n_u16(uint16x4_t a) { // CHECK-LABEL: @test_vshr_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <2 x i32> [[VSHR_N]] uint32x2_t test_vshr_n_u32(uint32x2_t a) { return vshr_n_u32(a, 1); @@ -14419,14 +14419,14 @@ uint32x2_t test_vshr_n_u32(uint32x2_t a) { // CHECK-LABEL: @test_vshr_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <1 x i64> [[VSHR_N]] uint64x1_t test_vshr_n_u64(uint64x1_t a) { return vshr_n_u64(a, 1); } // CHECK-LABEL: @test_vshrq_n_s8( -// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, splat (i8 1) // CHECK: ret <16 x i8> [[VSHR_N]] int8x16_t test_vshrq_n_s8(int8x16_t a) { return vshrq_n_s8(a, 1); @@ -14435,7 +14435,7 @@ int8x16_t test_vshrq_n_s8(int8x16_t a) { // CHECK-LABEL: @test_vshrq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <8 x i16> [[VSHR_N]] int16x8_t test_vshrq_n_s16(int16x8_t a) { return vshrq_n_s16(a, 1); @@ -14444,7 +14444,7 @@ int16x8_t test_vshrq_n_s16(int16x8_t a) { // CHECK-LABEL: @test_vshrq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <4 x i32> [[VSHR_N]] int32x4_t test_vshrq_n_s32(int32x4_t a) { return vshrq_n_s32(a, 1); @@ -14453,14 +14453,14 @@ int32x4_t test_vshrq_n_s32(int32x4_t a) { // CHECK-LABEL: @test_vshrq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <2 x i64> [[VSHR_N]] int64x2_t test_vshrq_n_s64(int64x2_t a) { return vshrq_n_s64(a, 1); } // CHECK-LABEL: @test_vshrq_n_u8( -// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, +// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, splat (i8 1) // CHECK: ret <16 x i8> [[VSHR_N]] uint8x16_t test_vshrq_n_u8(uint8x16_t a) { return vshrq_n_u8(a, 1); @@ -14469,7 +14469,7 @@ uint8x16_t test_vshrq_n_u8(uint8x16_t a) { // CHECK-LABEL: @test_vshrq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1) // CHECK: ret <8 x i16> [[VSHR_N]] uint16x8_t test_vshrq_n_u16(uint16x8_t a) { return vshrq_n_u16(a, 1); @@ -14478,7 +14478,7 @@ uint16x8_t test_vshrq_n_u16(uint16x8_t a) { // CHECK-LABEL: @test_vshrq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1) // CHECK: ret <4 x i32> [[VSHR_N]] uint32x4_t test_vshrq_n_u32(uint32x4_t a) { return vshrq_n_u32(a, 1); @@ -14487,14 +14487,14 @@ uint32x4_t test_vshrq_n_u32(uint32x4_t a) { // CHECK-LABEL: @test_vshrq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], +// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1) // CHECK: ret <2 x i64> [[VSHR_N]] uint64x2_t test_vshrq_n_u64(uint64x2_t a) { return vshrq_n_u64(a, 1); } // CHECK-LABEL: @test_vsli_n_s8( -// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 1)) // CHECK: ret <8 x i8> [[VSLI_N]] int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { return vsli_n_s8(a, b, 1); @@ -14505,7 +14505,7 @@ int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 1)) // CHECK: ret <4 x i16> [[VSLI_N2]] int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { return vsli_n_s16(a, b, 1); @@ -14516,7 +14516,7 @@ int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 1)) // CHECK: ret <2 x i32> [[VSLI_N2]] int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { return vsli_n_s32(a, b, 1); @@ -14527,14 +14527,14 @@ int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VSLI_N2]] int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) { return vsli_n_s64(a, b, 1); } // CHECK-LABEL: @test_vsli_n_u8( -// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 1)) // CHECK: ret <8 x i8> [[VSLI_N]] uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { return vsli_n_u8(a, b, 1); @@ -14545,7 +14545,7 @@ uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 1)) // CHECK: ret <4 x i16> [[VSLI_N2]] uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { return vsli_n_u16(a, b, 1); @@ -14556,7 +14556,7 @@ uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 1)) // CHECK: ret <2 x i32> [[VSLI_N2]] uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { return vsli_n_u32(a, b, 1); @@ -14567,14 +14567,14 @@ uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 1)) // CHECK: ret <1 x i64> [[VSLI_N2]] uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) { return vsli_n_u64(a, b, 1); } // CHECK-LABEL: @test_vsli_n_p8( -// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 1)) // CHECK: ret <8 x i8> [[VSLI_N]] poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { return vsli_n_p8(a, b, 1); @@ -14585,14 +14585,14 @@ poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 1)) // CHECK: ret <4 x i16> [[VSLI_N2]] poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) { return vsli_n_p16(a, b, 1); } // CHECK-LABEL: @test_vsliq_n_s8( -// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 1)) // CHECK: ret <16 x i8> [[VSLI_N]] int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { return vsliq_n_s8(a, b, 1); @@ -14603,7 +14603,7 @@ int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 1)) // CHECK: ret <8 x i16> [[VSLI_N2]] int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { return vsliq_n_s16(a, b, 1); @@ -14614,7 +14614,7 @@ int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 1)) // CHECK: ret <4 x i32> [[VSLI_N2]] int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { return vsliq_n_s32(a, b, 1); @@ -14625,14 +14625,14 @@ int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 1)) // CHECK: ret <2 x i64> [[VSLI_N2]] int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) { return vsliq_n_s64(a, b, 1); } // CHECK-LABEL: @test_vsliq_n_u8( -// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 1)) // CHECK: ret <16 x i8> [[VSLI_N]] uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { return vsliq_n_u8(a, b, 1); @@ -14643,7 +14643,7 @@ uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 1)) // CHECK: ret <8 x i16> [[VSLI_N2]] uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { return vsliq_n_u16(a, b, 1); @@ -14654,7 +14654,7 @@ uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 1)) // CHECK: ret <4 x i32> [[VSLI_N2]] uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { return vsliq_n_u32(a, b, 1); @@ -14665,14 +14665,14 @@ uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 1)) // CHECK: ret <2 x i64> [[VSLI_N2]] uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) { return vsliq_n_u64(a, b, 1); } // CHECK-LABEL: @test_vsliq_n_p8( -// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 1)) // CHECK: ret <16 x i8> [[VSLI_N]] poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { return vsliq_n_p8(a, b, 1); @@ -14683,14 +14683,14 @@ poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 1)) // CHECK: ret <8 x i16> [[VSLI_N2]] poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { return vsliq_n_p16(a, b, 1); } // CHECK-LABEL: @test_vsra_n_s8( -// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, splat (i8 1) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { @@ -14702,7 +14702,7 @@ int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], splat (i16 1) // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i16> [[TMP4]] int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { @@ -14714,7 +14714,7 @@ int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], splat (i32 1) // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i32> [[TMP4]] int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { @@ -14726,7 +14726,7 @@ int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], splat (i64 1) // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <1 x i64> [[TMP4]] int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { @@ -14734,7 +14734,7 @@ int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vsra_n_u8( -// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, splat (i8 1) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { @@ -14746,7 +14746,7 @@ uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], splat (i16 1) // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i16> [[TMP4]] uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { @@ -14758,7 +14758,7 @@ uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 1) // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i32> [[TMP4]] uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { @@ -14770,7 +14770,7 @@ uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], splat (i64 1) // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <1 x i64> [[TMP4]] uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { @@ -14778,7 +14778,7 @@ uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vsraq_n_s8( -// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, splat (i8 1) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { @@ -14790,7 +14790,7 @@ int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], splat (i16 1) // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <8 x i16> [[TMP4]] int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { @@ -14802,7 +14802,7 @@ int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], splat (i32 1) // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i32> [[TMP4]] int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { @@ -14814,7 +14814,7 @@ int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], splat (i64 1) // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i64> [[TMP4]] int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { @@ -14822,7 +14822,7 @@ int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vsraq_n_u8( -// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, +// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, splat (i8 1) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { @@ -14834,7 +14834,7 @@ uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], splat (i16 1) // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <8 x i16> [[TMP4]] uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { @@ -14846,7 +14846,7 @@ uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], splat (i32 1) // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i32> [[TMP4]] uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { @@ -14858,7 +14858,7 @@ uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], +// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], splat (i64 1) // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i64> [[TMP4]] uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { @@ -14866,7 +14866,7 @@ uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { } // CHECK-LABEL: @test_vsri_n_s8( -// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 -1)) // CHECK: ret <8 x i8> [[VSLI_N]] int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { return vsri_n_s8(a, b, 1); @@ -14877,7 +14877,7 @@ int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 -1)) // CHECK: ret <4 x i16> [[VSLI_N2]] int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { return vsri_n_s16(a, b, 1); @@ -14888,7 +14888,7 @@ int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 -1)) // CHECK: ret <2 x i32> [[VSLI_N2]] int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { return vsri_n_s32(a, b, 1); @@ -14899,14 +14899,14 @@ int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 -1)) // CHECK: ret <1 x i64> [[VSLI_N2]] int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) { return vsri_n_s64(a, b, 1); } // CHECK-LABEL: @test_vsri_n_u8( -// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 -1)) // CHECK: ret <8 x i8> [[VSLI_N]] uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) { return vsri_n_u8(a, b, 1); @@ -14917,7 +14917,7 @@ uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 -1)) // CHECK: ret <4 x i16> [[VSLI_N2]] uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) { return vsri_n_u16(a, b, 1); @@ -14928,7 +14928,7 @@ uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> splat (i32 -1)) // CHECK: ret <2 x i32> [[VSLI_N2]] uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) { return vsri_n_u32(a, b, 1); @@ -14939,14 +14939,14 @@ uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> splat (i64 -1)) // CHECK: ret <1 x i64> [[VSLI_N2]] uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) { return vsri_n_u64(a, b, 1); } // CHECK-LABEL: @test_vsri_n_p8( -// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> splat (i8 -1)) // CHECK: ret <8 x i8> [[VSLI_N]] poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { return vsri_n_p8(a, b, 1); @@ -14957,14 +14957,14 @@ poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> splat (i16 -1)) // CHECK: ret <4 x i16> [[VSLI_N2]] poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) { return vsri_n_p16(a, b, 1); } // CHECK-LABEL: @test_vsriq_n_s8( -// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 -1)) // CHECK: ret <16 x i8> [[VSLI_N]] int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { return vsriq_n_s8(a, b, 1); @@ -14975,7 +14975,7 @@ int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i16> [[VSLI_N2]] int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { return vsriq_n_s16(a, b, 1); @@ -14986,7 +14986,7 @@ int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i32> [[VSLI_N2]] int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { return vsriq_n_s32(a, b, 1); @@ -14997,14 +14997,14 @@ int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i64> [[VSLI_N2]] int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) { return vsriq_n_s64(a, b, 1); } // CHECK-LABEL: @test_vsriq_n_u8( -// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 -1)) // CHECK: ret <16 x i8> [[VSLI_N]] uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) { return vsriq_n_u8(a, b, 1); @@ -15015,7 +15015,7 @@ uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i16> [[VSLI_N2]] uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) { return vsriq_n_u16(a, b, 1); @@ -15026,7 +15026,7 @@ uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> ) +// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> splat (i32 -1)) // CHECK: ret <4 x i32> [[VSLI_N2]] uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) { return vsriq_n_u32(a, b, 1); @@ -15037,14 +15037,14 @@ uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> ) +// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> splat (i64 -1)) // CHECK: ret <2 x i64> [[VSLI_N2]] uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) { return vsriq_n_u64(a, b, 1); } // CHECK-LABEL: @test_vsriq_n_p8( -// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 -1)) // CHECK: ret <16 x i8> [[VSLI_N]] poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { return vsriq_n_p8(a, b, 1); @@ -15055,7 +15055,7 @@ poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> ) +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> splat (i16 -1)) // CHECK: ret <8 x i16> [[VSLI_N2]] poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) { return vsriq_n_p16(a, b, 1); @@ -18620,7 +18620,7 @@ uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8) // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VSUBHN2_I]] int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { @@ -18631,7 +18631,7 @@ int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16) // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VSUBHN2_I]] int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { @@ -18642,7 +18642,7 @@ int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32) // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VSUBHN2_I]] int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { @@ -18653,7 +18653,7 @@ int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8) // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VSUBHN2_I]] uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { @@ -18664,7 +18664,7 @@ uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16) // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VSUBHN2_I]] uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { @@ -18675,7 +18675,7 @@ uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b -// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], +// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32) // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VSUBHN2_I]] uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index 2850125a54c44a..748b5e4add7c76 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -962,7 +962,7 @@ void test_builtin_elementwise_copysign(float f1, float f2, double d1, double d2, f1 = __builtin_elementwise_copysign(2.0f, f1); // CHECK: [[V2F64:%.+]] = load <2 x double>, ptr %v2f64.addr, align 16 - // CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> , <2 x double> [[V2F64]]) + // CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 1.000000e+00), <2 x double> [[V2F64]]) v2f64 = __builtin_elementwise_copysign((double2)1.0, v2f64); } @@ -1035,7 +1035,7 @@ void test_builtin_elementwise_fma(float f32, double f64, // CHECK: [[V2F16_0:%.+]] = load <2 x half>, ptr %v2f16.addr // CHECK-NEXT: [[V2F16_1:%.+]] = load <2 x half>, ptr %v2f16.addr - // CHECK-NEXT: call <2 x half> @llvm.fma.v2f16(<2 x half> [[V2F16_0]], <2 x half> [[V2F16_1]], <2 x half> ) + // CHECK-NEXT: call <2 x half> @llvm.fma.v2f16(<2 x half> [[V2F16_0]], <2 x half> [[V2F16_1]], <2 x half> splat (half 0xH4400)) half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0); } diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index 3406cbdde2bf88..163aee4799ff0e 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -999,13 +999,13 @@ __device__ void nvvm_cvt_sm89() { // CHECK_PTX81_SM89: call i16 @llvm.nvvm.ff.to.e5m2x2.rn.relu(float 1.000000e+00, float 1.000000e+00) __nvvm_ff_to_e5m2x2_rn_relu(1.0f, 1.0f); - // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn(<2 x half> ) + // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn(<2 x half> splat (half 0xH3C00)) __nvvm_f16x2_to_e4m3x2_rn({1.0f16, 1.0f16}); - // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn.relu(<2 x half> ) + // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn.relu(<2 x half> splat (half 0xH3C00)) __nvvm_f16x2_to_e4m3x2_rn_relu({1.0f16, 1.0f16}); - // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn(<2 x half> ) + // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn(<2 x half> splat (half 0xH3C00)) __nvvm_f16x2_to_e5m2x2_rn({1.0f16, 1.0f16}); - // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn.relu(<2 x half> ) + // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn.relu(<2 x half> splat (half 0xH3C00)) __nvvm_f16x2_to_e5m2x2_rn_relu({1.0f16, 1.0f16}); // CHECK_PTX81_SM89: call <2 x half> @llvm.nvvm.e4m3x2.to.f16x2.rn(i16 18504) @@ -1035,12 +1035,12 @@ __device__ void nvvm_abs_neg_bf16_bf16x2_sm80() { // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.abs.bf16(bfloat 0xR3DCD) __nvvm_abs_bf16(BF16); - // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.abs.bf16x2(<2 x bfloat> ) + // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.abs.bf16x2(<2 x bfloat> splat (bfloat 0xR3DCD)) __nvvm_abs_bf16x2(BF16X2); // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.neg.bf16(bfloat 0xR3DCD) __nvvm_neg_bf16(BF16); - // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.neg.bf16x2(<2 x bfloat> ) + // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.neg.bf16x2(<2 x bfloat> splat (bfloat 0xR3DCD)) __nvvm_neg_bf16x2(BF16X2); #endif // CHECK: ret void diff --git a/clang/test/CodeGen/builtinshufflevector2.c b/clang/test/CodeGen/builtinshufflevector2.c index da957503f86b65..3d3ababd167c4f 100644 --- a/clang/test/CodeGen/builtinshufflevector2.c +++ b/clang/test/CodeGen/builtinshufflevector2.c @@ -5,7 +5,7 @@ typedef unsigned int uint4 __attribute__((ext_vector_type(4))); // CHECK-LABEL: define {{.*}}void @clang_shufflevector_v_v( void clang_shufflevector_v_v( float4* A, float4 x, uint4 mask ) { -// CHECK: [[MASK:%.*]] = and <4 x i32> {{%.*}}, +// CHECK: [[MASK:%.*]] = and <4 x i32> {{%.*}}, splat (i32 3) // CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i{{[0-9]+}} 0 // CHECK: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i{{[0-9]+}} [[I]] // diff --git a/clang/test/CodeGen/const-init.c b/clang/test/CodeGen/const-init.c index fc973cb983a80a..175d221ad410a6 100644 --- a/clang/test/CodeGen/const-init.c +++ b/clang/test/CodeGen/const-init.c @@ -139,7 +139,7 @@ void g28(void) { typedef long long v1i64 __attribute((vector_size(8))); typedef short v12i16 __attribute((vector_size(24))); typedef long double v2f80 __attribute((vector_size(24))); - // CHECK: @g28.a = internal global <1 x i64> + // CHECK: @g28.a = internal global <1 x i64> splat (i64 10) // @g28.b = internal global <12 x i16> // @g28.c = internal global <2 x x86_fp80> , align 32 static v1i64 a = (v1i64)10LL; diff --git a/clang/test/CodeGen/matrix-type-operators.c b/clang/test/CodeGen/matrix-type-operators.c index c5886858515483..dde9857921cd12 100644 --- a/clang/test/CodeGen/matrix-type-operators.c +++ b/clang/test/CodeGen/matrix-type-operators.c @@ -790,7 +790,7 @@ void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) { // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} -// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], +// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00) // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void // @@ -804,7 +804,7 @@ void multiply_float_matrix_constant(fx2x3_t a) { // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} -// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], +// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00) // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void void multiply_compound_float_matrix_constant(fx2x3_t a) { @@ -817,7 +817,7 @@ void multiply_compound_float_matrix_constant(fx2x3_t a) { // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} -// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> , [[MAT]] +// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> splat (i32 5), [[MAT]] // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void // @@ -831,7 +831,7 @@ void multiply_int_matrix_constant(ix9x3_t a) { // CHECK-NEXT: store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} -// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], +// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], splat (i32 5) // CHECK-NEXT: store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void // @@ -939,7 +939,7 @@ void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) { // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} -// CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], +// CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], splat (float 2.500000e+00) // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: ret void // diff --git a/clang/test/CodeGen/neon-immediate-ubsan.c b/clang/test/CodeGen/neon-immediate-ubsan.c index ffe94d4f346fd9..9e914995373ffe 100644 --- a/clang/test/CodeGen/neon-immediate-ubsan.c +++ b/clang/test/CodeGen/neon-immediate-ubsan.c @@ -21,6 +21,6 @@ int32x2_t test_vqrshrn_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vqrshrn_n_s64 // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> {{.*}}, i32 1) - // CHECK-ARMV7: call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> {{.*}}, <2 x i64> ) + // CHECK-ARMV7: call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> {{.*}}, <2 x i64> splat (i64 -1)) return vqrshrn_n_s64(a, 0 + 1); } diff --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c index da7cdff0b1a46b..d9b34c8e383f99 100644 --- a/clang/test/CodeGen/nofpclass.c +++ b/clang/test/CodeGen/nofpclass.c @@ -103,7 +103,7 @@ float defined_func_f32(float a, float b, float c) { // CFINITEONLY-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16 // CFINITEONLY-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16 // CFINITEONLY-NEXT: [[TMP3:%.*]] = call nnan ninf <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -// CFINITEONLY-NEXT: [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP3]], +// CFINITEONLY-NEXT: [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP3]], splat (double 4.000000e+00) // CFINITEONLY-NEXT: ret <2 x double> [[ADD]] // // CLFINITEONLY: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) @@ -111,7 +111,7 @@ float defined_func_f32(float a, float b, float c) { // CLFINITEONLY-SAME: (<2 x double> noundef nofpclass(nan inf) [[A:%.*]], <2 x double> noundef nofpclass(nan inf) [[B:%.*]], <2 x double> noundef nofpclass(nan inf) [[C:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // CLFINITEONLY-NEXT: entry: // CLFINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf <2 x double> @llvm.fma.v2f64(<2 x double> [[A]], <2 x double> [[B]], <2 x double> [[C]]) -// CLFINITEONLY-NEXT: [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP0]], +// CLFINITEONLY-NEXT: [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP0]], splat (double 4.000000e+00) // CLFINITEONLY-NEXT: ret <2 x double> [[ADD]] // // NONANS: Function Attrs: noinline nounwind optnone @@ -128,7 +128,7 @@ float defined_func_f32(float a, float b, float c) { // NONANS-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16 // NONANS-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16 // NONANS-NEXT: [[TMP3:%.*]] = call nnan <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -// NONANS-NEXT: [[ADD:%.*]] = fadd nnan <2 x double> [[TMP3]], +// NONANS-NEXT: [[ADD:%.*]] = fadd nnan <2 x double> [[TMP3]], splat (double 4.000000e+00) // NONANS-NEXT: ret <2 x double> [[ADD]] // // NOINFS: Function Attrs: noinline nounwind optnone @@ -145,7 +145,7 @@ float defined_func_f32(float a, float b, float c) { // NOINFS-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16 // NOINFS-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16 // NOINFS-NEXT: [[TMP3:%.*]] = call ninf <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) -// NOINFS-NEXT: [[ADD:%.*]] = fadd ninf <2 x double> [[TMP3]], +// NOINFS-NEXT: [[ADD:%.*]] = fadd ninf <2 x double> [[TMP3]], splat (double 4.000000e+00) // NOINFS-NEXT: ret <2 x double> [[ADD]] // double2 defined_func_v2f64(double2 a, double2 b, double2 c) { diff --git a/clang/test/CodeGen/ppc-vec_ct-truncate.c b/clang/test/CodeGen/ppc-vec_ct-truncate.c index 42542edfad71c9..e3a0f11618759c 100644 --- a/clang/test/CodeGen/ppc-vec_ct-truncate.c +++ b/clang/test/CodeGen/ppc-vec_ct-truncate.c @@ -30,11 +30,11 @@ void test(void) { res_vsi = vec_cts(a1, 31); // CHECK: [[TMP0:%.*]] = load <2 x double>, ptr @a1, align 16 - // CHECK-NEXT: fmul <2 x double> [[TMP0]], + // CHECK-NEXT: fmul <2 x double> [[TMP0]], splat (double 0x41E0000000000000) res_vsi = vec_cts(a1, 500); // CHECK: [[TMP4:%.*]] = load <2 x double>, ptr @a1, align 16 - // CHECK-NEXT: fmul <2 x double> [[TMP4]], + // CHECK-NEXT: fmul <2 x double> [[TMP4]], splat (double 0x4130000000000000) res_vsi = vec_ctu(vf1, 31); // CHECK: [[TMP8:%.*]] = load <4 x float>, ptr @vf1, align 16 @@ -46,19 +46,19 @@ void test(void) { res_vull = vec_ctul(vf1, 31); // CHECK: [[TMP12:%.*]] = load <4 x float>, ptr @vf1, align 16 - // CHECK-NEXT: fmul <4 x float> [[TMP12]], + // CHECK-NEXT: fmul <4 x float> [[TMP12]], splat (float 0x41E0000000000000) res_vull = vec_ctul(vf1, 500); // CHECK: [[TMP21:%.*]] = load <4 x float>, ptr @vf1, align 16 - // CHECK-NEXT: fmul <4 x float> [[TMP21]], + // CHECK-NEXT: fmul <4 x float> [[TMP21]], splat (float 0x4130000000000000) res_vsll = vec_ctsl(vf1, 31); // CHECK: [[TMP30:%.*]] = load <4 x float>, ptr @vf1, align 16 - // CHECK-NEXT: fmul <4 x float> [[TMP30]], + // CHECK-NEXT: fmul <4 x float> [[TMP30]], splat (float 0x41E0000000000000) res_vsll = vec_ctsl(vf1, 500); // CHECK: [[TMP39:%.*]] = load <4 x float>, ptr @vf1, align 16 - // CHECK-NEXT: fmul <4 x float> [[TMP39]], + // CHECK-NEXT: fmul <4 x float> [[TMP39]], splat (float 0x4130000000000000) res_vf = vec_ctf(vsi1, 31); // CHECK: [[TMP48:%.*]] = load <4 x i32>, ptr @vsi1, align 16 @@ -71,10 +71,10 @@ void test(void) { res_vd = vec_ctd(vsi1, 31); // CHECK: [[TMP53:%.*]] = load <4 x i32>, ptr @vsi1, align 16 // CHECK: [[TMP83:%.*]] = call <2 x double> @llvm.ppc.vsx.xvcvsxwdp(<4 x i32> [[TMP82:%.*]]) - // CHECK-NEXT: fmul <2 x double> [[TMP83]], + // CHECK-NEXT: fmul <2 x double> [[TMP83]], splat (double 0x3E00000000000000) res_vd = vec_ctd(vsi1, 500); // CHECK: [[TMP84:%.*]] = load <4 x i32>, ptr @vsi1, align 16 // CHECK: [[TMP115:%.*]] = call <2 x double> @llvm.ppc.vsx.xvcvsxwdp(<4 x i32> [[TMP114:%.*]]) - // CHECK-NEXT: fmul <2 x double> [[TMP115]], + // CHECK-NEXT: fmul <2 x double> [[TMP115]], splat (double 0x3EB0000000000000) } diff --git a/clang/test/CodeGen/variadic-nvptx.c b/clang/test/CodeGen/variadic-nvptx.c index 4e4fc5ecdef65e..4c9776342c03e3 100644 --- a/clang/test/CodeGen/variadic-nvptx.c +++ b/clang/test/CodeGen/variadic-nvptx.c @@ -39,7 +39,7 @@ extern void varargs_simple(int, ...); // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[A]], i32 0, i32 2 // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 // CHECK-NEXT: call void (i32, ...) @varargs_simple(i32 noundef 0, i32 [[TMP7]], i8 [[TMP9]], i32 [[TMP11]]) #[[ATTR3]] -// CHECK-NEXT: store <4 x i32> , ptr [[V]], align 16 +// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[V]], align 16 // CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, ptr [[V]], align 16 // CHECK-NEXT: call void (i32, ...) @varargs_simple(i32 noundef 0, <4 x i32> noundef [[TMP12]]) #[[ATTR3]] // CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0]], ptr [[T]], i32 0, i32 0 diff --git a/clang/test/CodeGen/vecshift.c b/clang/test/CodeGen/vecshift.c index 3cb772a6feb404..23cb0225f11e22 100644 --- a/clang/test/CodeGen/vecshift.c +++ b/clang/test/CodeGen/vecshift.c @@ -51,22 +51,22 @@ vector_uint4 vui4; void foo(void) { vc8 = 1 << vc8; // CHECK: [[t0:%.+]] = load <8 x i8>, ptr {{@.+}}, -// CHECK: shl <8 x i8> , [[t0]] +// CHECK: shl <8 x i8> splat (i8 1), [[t0]] vuc8 = 1 << vuc8; // CHECK: [[t1:%.+]] = load <8 x i8>, ptr {{@.+}}, -// CHECK: shl <8 x i8> , [[t1]] +// CHECK: shl <8 x i8> splat (i8 1), [[t1]] vi8 = 1 << vi8; // CHECK: [[t2:%.+]] = load <8 x i32>, ptr {{@.+}}, -// CHECK: shl <8 x i32> , [[t2]] +// CHECK: shl <8 x i32> splat (i32 1), [[t2]] vui8 = 1 << vui8; // CHECK: [[t3:%.+]] = load <8 x i32>, ptr {{@.+}}, -// CHECK: shl <8 x i32> , [[t3]] +// CHECK: shl <8 x i32> splat (i32 1), [[t3]] vs8 = 1 << vs8; // CHECK: [[t4:%.+]] = load <8 x i16>, ptr {{@.+}}, -// CHECK: shl <8 x i16> , [[t4]] +// CHECK: shl <8 x i16> splat (i16 1), [[t4]] vus8 = 1 << vus8; // CHECK: [[t5:%.+]] = load <8 x i16>, ptr {{@.+}}, -// CHECK: shl <8 x i16> , [[t5]] +// CHECK: shl <8 x i16> splat (i16 1), [[t5]] vc8 = c << vc8; // CHECK: [[t6:%.+]] = load i8, ptr @c, @@ -104,7 +104,7 @@ void foo(void) { // CHECK: shl <8 x i16> [[splat_splat20]], [[t15]] vus8 = 1 << vus8; // CHECK: [[t16:%.+]] = load <8 x i16>, ptr {{@.+}}, -// CHECK: [[shl22:%.+]] = shl <8 x i16> , [[t16]] +// CHECK: [[shl22:%.+]] = shl <8 x i16> splat (i16 1), [[t16]] vc8 = vc8 << vc8; // CHECK: [[t17:%.+]] = load <8 x i8>, ptr {{@.+}}, diff --git a/clang/test/CodeGen/vector-scalar.c b/clang/test/CodeGen/vector-scalar.c index 0c973cd41ee44b..f115bae0f75830 100644 --- a/clang/test/CodeGen/vector-scalar.c +++ b/clang/test/CodeGen/vector-scalar.c @@ -5,28 +5,28 @@ typedef unsigned char uchar4 __attribute__ ((vector_size (4))); // CHECK: @add2 -// CHECK: add <4 x i8> {{.*}}, +// CHECK: add <4 x i8> {{.*}}, splat (i8 2) uchar4 add2(uchar4 v) { return v + 2; } // CHECK: @sub2 -// CHECK: sub <4 x i8> {{.*}}, +// CHECK: sub <4 x i8> {{.*}}, splat (i8 2) uchar4 sub2(uchar4 v) { return v - 2; } // CHECK: @mul2 -// CHECK: mul <4 x i8> {{.*}}, +// CHECK: mul <4 x i8> {{.*}}, splat (i8 2) uchar4 mul2(uchar4 v) { return v * 2; } // CHECK: @div2 -// CHECK: udiv <4 x i8> {{.*}}, +// CHECK: udiv <4 x i8> {{.*}}, splat (i8 2) uchar4 div2(uchar4 v) { return v / 2; @@ -35,7 +35,7 @@ uchar4 div2(uchar4 v) typedef __attribute__(( ext_vector_type(4) )) unsigned char uchar4_ext; // CHECK: @div3_ext -// CHECK: udiv <4 x i8> %{{.*}}, +// CHECK: udiv <4 x i8> %{{.*}}, splat (i8 3) uchar4_ext div3_ext(uchar4_ext v) { return v / 3; diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp index c92a970b42f60f..7c323c2d368c0e 100644 --- a/clang/test/CodeGenCXX/auto-var-init.cpp +++ b/clang/test/CodeGenCXX/auto-var-init.cpp @@ -1581,7 +1581,7 @@ TEST_UNINIT(intvec16, int __attribute__((vector_size(16)))); // CHECK: %uninit = alloca <4 x i32>, align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_intvec16_uninit() -// PATTERN: store <4 x i32> , ptr %uninit, align 16, !annotation [[AUTO_INIT]] +// PATTERN: store <4 x i32> splat (i32 [[I32]]), ptr %uninit, align 16, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_intvec16_uninit() // ZERO: store <4 x i32> zeroinitializer, ptr %uninit, align 16, !annotation [[AUTO_INIT]] @@ -1595,7 +1595,7 @@ TEST_BRACES(intvec16, int __attribute__((vector_size(16)))); TEST_CUSTOM(intvec16, int __attribute__((vector_size(16))), { 0x44444444, 0x44444444, 0x44444444, 0x44444444 }); // CHECK-LABEL: @test_intvec16_custom() // CHECK: %custom = alloca <4 x i32>, align [[ALIGN:[0-9]*]] -// CHECK-NEXT: store <4 x i32> , ptr %custom, align [[ALIGN]] +// CHECK-NEXT: store <4 x i32> splat (i32 1145324612), ptr %custom, align [[ALIGN]] // CHECK-NOT: !annotation // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) @@ -1604,7 +1604,7 @@ TEST_UNINIT(longlongvec32, long long __attribute__((vector_size(32)))); // CHECK: %uninit = alloca <4 x i64>, align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_longlongvec32_uninit() -// PATTERN: store <4 x i64> , ptr %uninit, align 32, !annotation [[AUTO_INIT]] +// PATTERN: store <4 x i64> splat (i64 [[I64]]), ptr %uninit, align 32, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_longlongvec32_uninit() // ZERO: store <4 x i64> zeroinitializer, ptr %uninit, align 32, !annotation [[AUTO_INIT]] @@ -1618,7 +1618,7 @@ TEST_BRACES(longlongvec32, long long __attribute__((vector_size(32)))); TEST_CUSTOM(longlongvec32, long long __attribute__((vector_size(32))), { 0x3333333333333333, 0x3333333333333333, 0x3333333333333333, 0x3333333333333333 }); // CHECK-LABEL: @test_longlongvec32_custom() // CHECK: %custom = alloca <4 x i64>, align [[ALIGN:[0-9]*]] -// CHECK-NEXT: store <4 x i64> , ptr %custom, align [[ALIGN]] +// CHECK-NEXT: store <4 x i64> splat (i64 3689348814741910323), ptr %custom, align [[ALIGN]] // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) TEST_UNINIT(floatvec16, float __attribute__((vector_size(16)))); @@ -1626,7 +1626,7 @@ TEST_UNINIT(floatvec16, float __attribute__((vector_size(16)))); // CHECK: %uninit = alloca <4 x float>, align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_floatvec16_uninit() -// PATTERN: store <4 x float> , ptr %uninit, align 16, !annotation [[AUTO_INIT]] +// PATTERN: store <4 x float> splat (float 0xFFFFFFFFE0000000), ptr %uninit, align 16, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_floatvec16_uninit() // ZERO: store <4 x float> zeroinitializer, ptr %uninit, align 16, !annotation [[AUTO_INIT]] @@ -1640,7 +1640,7 @@ TEST_BRACES(floatvec16, float __attribute__((vector_size(16)))); TEST_CUSTOM(floatvec16, float __attribute__((vector_size(16))), { 3.1415926535897932384626433, 3.1415926535897932384626433, 3.1415926535897932384626433, 3.1415926535897932384626433 }); // CHECK-LABEL: @test_floatvec16_custom() // CHECK: %custom = alloca <4 x float>, align [[ALIGN:[0-9]*]] -// CHECK-NEXT: store <4 x float> , ptr %custom, align [[ALIGN]] +// CHECK-NEXT: store <4 x float> splat (float 0x400921FB60000000), ptr %custom, align [[ALIGN]] // CHECK-NOT: !annotation // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) @@ -1649,7 +1649,7 @@ TEST_UNINIT(doublevec32, double __attribute__((vector_size(32)))); // CHECK: %uninit = alloca <4 x double>, align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_doublevec32_uninit() -// PATTERN: store <4 x double> , ptr %uninit, align 32, !annotation [[AUTO_INIT]] +// PATTERN: store <4 x double> splat (double 0xFFFFFFFFFFFFFFFF), ptr %uninit, align 32, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_doublevec32_uninit() // ZERO: store <4 x double> zeroinitializer, ptr %uninit, align 32, !annotation [[AUTO_INIT]] @@ -1663,7 +1663,7 @@ TEST_BRACES(doublevec32, double __attribute__((vector_size(32)))); TEST_CUSTOM(doublevec32, double __attribute__((vector_size(32))), { 3.1415926535897932384626433, 3.1415926535897932384626433, 3.1415926535897932384626433, 3.1415926535897932384626433 }); // CHECK-LABEL: @test_doublevec32_custom() // CHECK: %custom = alloca <4 x double>, align [[ALIGN:[0-9]*]] -// CHECK-NEXT: store <4 x double> , ptr %custom, align [[ALIGN]] +// CHECK-NEXT: store <4 x double> splat (double 0x400921FB54442D18), ptr %custom, align [[ALIGN]] // CHECK-NOT: !annotation // CHECK-NEXT: call void @{{.*}}used{{.*}}%custom) @@ -1673,7 +1673,7 @@ TEST_UNINIT(doublevec24, double __attribute__((vector_size(24)))); // CHECK: %uninit = alloca <3 x double>, align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_doublevec24_uninit() -// PATTERN: store <3 x double> , ptr %uninit, align 32, !annotation [[AUTO_INIT]] +// PATTERN: store <3 x double> splat (double 0xFFFFFFFFFFFFFFFF), ptr %uninit, align 32, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_doublevec24_uninit() // ZERO: store <3 x double> zeroinitializer, ptr %uninit, align 32, !annotation [[AUTO_INIT]] @@ -1683,7 +1683,7 @@ TEST_UNINIT(longdoublevec32, long double __attribute__((vector_size(sizeof(long // CHECK: %uninit = alloca <2 x x86_fp80>, align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_longdoublevec32_uninit() -// PATTERN: store <2 x x86_fp80> , ptr %uninit, align 32, !annotation [[AUTO_INIT]] +// PATTERN: store <2 x x86_fp80> splat (x86_fp80 0xKFFFFFFFFFFFFFFFFFFFF), ptr %uninit, align 32, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_longdoublevec32_uninit() // ZERO: store <2 x x86_fp80> zeroinitializer, ptr %uninit, align 32, !annotation [[AUTO_INIT]] diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp index a098f5a9057c0e..97b5d6ce16b880 100644 --- a/clang/test/CodeGenCXX/ext-int.cpp +++ b/clang/test/CodeGenCXX/ext-int.cpp @@ -450,38 +450,38 @@ void ShiftBitIntByConstant(uint16_t4 Ext) { // LIN32: define dso_local void @_Z21ShiftBitIntByConstantDv4_DU16_(i64 % // WIN: define dso_local void @"?ShiftBitIntByConstant@@YAXT?$__vector@U?$_UBitInt@$0BA@@__clang@@$03@__clang@@@Z"(<4 x i16> Ext << 7; - // CHECK: shl <4 x i16> %{{.+}}, + // CHECK: shl <4 x i16> %{{.+}}, splat (i16 7) Ext >> 7; - // CHECK: lshr <4 x i16> %{{.+}}, + // CHECK: lshr <4 x i16> %{{.+}}, splat (i16 7) Ext << -7; - // CHECK: shl <4 x i16> %{{.+}}, + // CHECK: shl <4 x i16> %{{.+}}, splat (i16 -7) Ext >> -7; - // CHECK: lshr <4 x i16> %{{.+}}, + // CHECK: lshr <4 x i16> %{{.+}}, splat (i16 -7) // UB in C/C++, Defined in OpenCL. Ext << 29; - // CHECK: shl <4 x i16> %{{.+}}, + // CHECK: shl <4 x i16> %{{.+}}, splat (i16 29) Ext >> 29; - // CHECK: lshr <4 x i16> %{{.+}}, + // CHECK: lshr <4 x i16> %{{.+}}, splat (i16 29) } void ShiftBitIntByConstant(vint32_t8 Ext) { // LIN64: define{{.*}} void @_Z21ShiftBitIntByConstantDv8_DB32_(ptr byval(<8 x i32>) align 32 % // LIN32: define dso_local void @_Z21ShiftBitIntByConstantDv8_DB32_(<8 x i32> % // WIN: define dso_local void @"?ShiftBitIntByConstant@@YAXT?$__vector@U?$_BitInt@$0CA@@__clang@@$07@__clang@@@Z"(<8 x i32> Ext << 7; - // CHECK: shl <8 x i32> %{{.+}}, + // CHECK: shl <8 x i32> %{{.+}}, splat (i32 7) Ext >> 7; - // CHECK: ashr <8 x i32> %{{.+}}, + // CHECK: ashr <8 x i32> %{{.+}}, splat (i32 7) Ext << -7; - // CHECK: shl <8 x i32> %{{.+}}, + // CHECK: shl <8 x i32> %{{.+}}, splat (i32 -7) Ext >> -7; - // CHECK: ashr <8 x i32> %{{.+}}, + // CHECK: ashr <8 x i32> %{{.+}}, splat (i32 -7) // UB in C/C++, Defined in OpenCL. Ext << 29; - // CHECK: shl <8 x i32> %{{.+}}, + // CHECK: shl <8 x i32> %{{.+}}, splat (i32 29) Ext >> 29; - // CHECK: ashr <8 x i32> %{{.+}}, + // CHECK: ashr <8 x i32> %{{.+}}, splat (i32 29) } void ConstantShiftByBitInt(_BitInt(28) Ext, _BitInt(65) LargeExt) { diff --git a/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp b/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp index 139feab30cb2ff..4504000856f23a 100644 --- a/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp +++ b/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp @@ -41,7 +41,7 @@ void TwoVectorOps() { // CHECK: [[RHS:%.+]] = load <2 x i32> // CHECK: [[NEG:%.+]] = icmp slt <2 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <2 x i1> [[NEG]] to <2 x i32> - // CHECK: [[XOR:%.+]] = xor <2 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <2 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_AND:%.+]] = and <2 x i32> [[RHS]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <2 x i32> [[LHS]], [[SEXT]] // CHECK: = or <2 x i32> [[RHS_AND]], [[LHS_AND]] @@ -52,7 +52,7 @@ void TwoVectorOps() { // CHECK: [[RHS:%.+]] = load <2 x float> // CHECK: [[NEG:%.+]] = icmp slt <2 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <2 x i1> [[NEG]] to <2 x i32> - // CHECK: [[XOR:%.+]] = xor <2 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <2 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_EXT:%.+]] = bitcast <2 x float> [[RHS]] to <2 x i32> // CHECK: [[LHS_EXT:%.+]] = bitcast <2 x float> [[LHS]] to <2 x i32> // CHECK: [[RHS_AND:%.+]] = and <2 x i32> [[RHS_EXT]], [[XOR]] @@ -66,7 +66,7 @@ void TwoVectorOps() { // CHECK: [[RHS:%.+]] = load <2 x double> // CHECK: [[NEG:%.+]] = icmp slt <2 x i64> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <2 x i1> [[NEG]] to <2 x i64> - // CHECK: [[XOR:%.+]] = xor <2 x i64> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <2 x i64> [[SEXT]], splat (i64 -1) // CHECK: [[RHS_EXT:%.+]] = bitcast <2 x double> [[RHS]] to <2 x i64> // CHECK: [[LHS_EXT:%.+]] = bitcast <2 x double> [[LHS]] to <2 x i64> // CHECK: [[RHS_AND:%.+]] = and <2 x i64> [[RHS_EXT]], [[XOR]] @@ -87,7 +87,7 @@ void TwoScalarOps() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i16> [[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i16> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i16> - // CHECK: [[XOR:%.+]] = xor <4 x i16> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i16> [[SEXT]], splat (i16 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i16> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i16> [[LHS_SPLAT]], [[SEXT]] // CHECK: = or <4 x i16> [[RHS_AND]], [[LHS_AND]] @@ -102,7 +102,7 @@ void TwoScalarOps() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i16> [[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i16> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i16> - // CHECK: [[XOR:%.+]] = xor <4 x i16> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i16> [[SEXT]], splat (i16 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i16> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i16> [[LHS_SPLAT]], [[SEXT]] // CHECK: = or <4 x i16> [[RHS_AND]], [[LHS_AND]] @@ -119,7 +119,7 @@ void TwoScalarOps() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i32> [[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32> - // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i32> [[LHS_SPLAT]], [[SEXT]] // CHECK: = or <4 x i32> [[RHS_AND]], [[LHS_AND]] @@ -135,7 +135,7 @@ void TwoScalarOps() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x float> [[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32> - // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_CAST:%.+]] = bitcast <4 x float> [[RHS_SPLAT]] to <4 x i32> // CHECK: [[LHS_CAST:%.+]] = bitcast <4 x float> [[LHS_SPLAT]] to <4 x i32> // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_CAST]], [[XOR]] @@ -153,7 +153,7 @@ void TwoScalarOps() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x double> [[RHS_SPLAT_INSERT]], <4 x double> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64> - // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1) // CHECK: [[RHS_CAST:%.+]] = bitcast <4 x double> [[RHS_SPLAT]] to <4 x i64> // CHECK: [[LHS_CAST:%.+]] = bitcast <4 x double> [[LHS_SPLAT]] to <4 x i64> // CHECK: [[RHS_AND:%.+]] = and <4 x i64> [[RHS_CAST]], [[XOR]] @@ -171,7 +171,7 @@ void TwoScalarOps() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i32> [[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32> - // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i32> [[LHS_SPLAT]], [[SEXT]] // CHECK: = or <4 x i32> [[RHS_AND]], [[LHS_AND]] @@ -187,7 +187,7 @@ void OneScalarOp() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i32> [[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32> - // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i32> [[LHS]], [[SEXT]] // CHECK: = or <4 x i32> [[RHS_AND]], [[LHS_AND]] @@ -197,8 +197,8 @@ void OneScalarOp() { // CHECK: [[LHS:%.+]] = load <4 x i32> // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32> - // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], - // CHECK: [[RHS_AND:%.+]] = and <4 x i32> , [[XOR]] + // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1) + // CHECK: [[RHS_AND:%.+]] = and <4 x i32> splat (i32 5), [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i32> [[LHS]], [[SEXT]] // CHECK: = or <4 x i32> [[RHS_AND]], [[LHS_AND]] @@ -210,7 +210,7 @@ void OneScalarOp() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x float> [[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32> - // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1) // CHECK: [[RHS_CAST:%.+]] = bitcast <4 x float> [[RHS_SPLAT]] to <4 x i32> // CHECK: [[LHS_CAST:%.+]] = bitcast <4 x float> [[LHS]] to <4 x i32> // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_CAST]], [[XOR]] @@ -222,9 +222,9 @@ void OneScalarOp() { // CHECK: [[LHS:%.+]] = load <4 x double> // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64> - // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1) // CHECK: [[LHS_CAST:%.+]] = bitcast <4 x double> [[LHS]] to <4 x i64> - // CHECK: [[RHS_AND:%.+]] = and <4 x i64> , [[XOR]] + // CHECK: [[RHS_AND:%.+]] = and <4 x i64> splat (i64 4618441417868443648), [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i64> [[LHS_CAST]], [[SEXT]] // CHECK: = or <4 x i64> [[RHS_AND]], [[LHS_AND]] @@ -233,8 +233,8 @@ void OneScalarOp() { // CHECK: [[LHS:%.+]] = load <4 x i64> // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64> - // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], - // CHECK: [[RHS_AND:%.+]] = and <4 x i64> , [[XOR]] + // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1) + // CHECK: [[RHS_AND:%.+]] = and <4 x i64> splat (i64 6), [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i64> [[LHS]], [[SEXT]] // CHECK: [[OR:%.+]] = or <4 x i64> [[RHS_AND]], [[LHS_AND]] @@ -247,7 +247,7 @@ void OneScalarOp() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i64> [[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64> - // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i64> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i64> [[LHS]], [[SEXT]] // CHECK: [[OR:%.+]] = or <4 x i64> [[RHS_AND]], [[LHS_AND]] @@ -260,7 +260,7 @@ void OneScalarOp() { // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i64> [[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64> - // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], + // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1) // CHECK: [[RHS_AND:%.+]] = and <4 x i64> [[RHS_SPLAT]], [[XOR]] // CHECK: [[LHS_AND:%.+]] = and <4 x i64> [[LHS]], [[SEXT]] // CHECK: [[OR:%.+]] = or <4 x i64> [[RHS_AND]], [[LHS_AND]] diff --git a/clang/test/CodeGenCXX/matrix-type-builtins.cpp b/clang/test/CodeGenCXX/matrix-type-builtins.cpp index 164a220de26d4d..a7fde8719c128a 100644 --- a/clang/test/CodeGenCXX/matrix-type-builtins.cpp +++ b/clang/test/CodeGenCXX/matrix-type-builtins.cpp @@ -57,7 +57,7 @@ void test_transpose_rvalue() { // CHECK-NEXT: entry: // CHECK-NEXT: [[M_T_ADDR:%.*]] = alloca [9 x float], align 4 // CHECK-NEXT: [[CALL_RES:%.*]] = call noundef <9 x float> @_Z10get_matrixv() - // CHECK-NEXT: [[ADD:%.*]] = fadd <9 x float> [[CALL_RES]], + // CHECK-NEXT: [[ADD:%.*]] = fadd <9 x float> [[CALL_RES]], splat (float 2.000000e+00) // CHECK-NEXT: [[M_T:%.*]] = call <9 x float> @llvm.matrix.transpose.v9f32(<9 x float> [[ADD]], i32 3, i32 3) // CHECK-NEXT: store <9 x float> [[M_T]], ptr [[M_T_ADDR]], align 4 matrix_t m_t = __builtin_matrix_transpose(get_matrix() + 2.0); diff --git a/clang/test/CodeGenCXX/matrix-type-operators.cpp b/clang/test/CodeGenCXX/matrix-type-operators.cpp index 8974d2b2600292..8854a718fb13ec 100644 --- a/clang/test/CodeGenCXX/matrix-type-operators.cpp +++ b/clang/test/CodeGenCXX/matrix-type-operators.cpp @@ -407,7 +407,7 @@ void test_constexpr2(matrix_type &m) { // NOOPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4{{$}} // OPT: [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]] - // CHECK-NEXT: [[SUB2:%.*]] = add <25 x i32> [[SUB]], + // CHECK-NEXT: [[SUB2:%.*]] = add <25 x i32> [[SUB]], splat (i32 1) // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}} // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}} // CHECK-NEXT: store <25 x i32> [[SUB2]], ptr [[MAT_ADDR]], align 4 diff --git a/clang/test/CodeGenCXX/vector-size-conditional.cpp b/clang/test/CodeGenCXX/vector-size-conditional.cpp index fcfaa76d8b8c45..033847cbb083ad 100644 --- a/clang/test/CodeGenCXX/vector-size-conditional.cpp +++ b/clang/test/CodeGenCXX/vector-size-conditional.cpp @@ -146,7 +146,7 @@ void OneScalarOp() { // CHECK: %[[COND:.+]] = load <4 x i32> // CHECK: %[[LHS:.+]] = load <4 x i32> // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer - // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> + // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> splat (i32 5) four_ints ?: some_float; // CHECK: %[[COND:.+]] = load <4 x i32> @@ -161,7 +161,7 @@ void OneScalarOp() { // CHECK: %[[COND:.+]] = load <4 x i32> // CHECK: %[[LHS:.+]] = load <4 x i32> // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer - // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> + // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> splat (i32 5) four_ints ? some_float : four_ints; // CHECK: %[[COND:.+]] = load <4 x i32> @@ -186,19 +186,19 @@ void OneScalarOp() { // CHECK: %[[COND:.+]] = load <4 x i64> // CHECK: %[[LHS:.+]] = load <4 x double> // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer - // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x double> %[[LHS]], <4 x double> + // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x double> %[[LHS]], <4 x double> splat (double 6.{{.+}}) four_ll ? four_ll : 6.0; // CHECK: %[[COND:.+]] = load <4 x i64> // CHECK: %[[LHS:.+]] = load <4 x i64> // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer - // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> + // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> splat (i64 6) four_ll ? four_ll : 6; // CHECK: %[[COND:.+]] = load <4 x i64> // CHECK: %[[LHS:.+]] = load <4 x i64> // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer - // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> + // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> splat (i64 6) four_ll ? four_ll : some_int; // CHECK: %[[COND:.+]] = load <4 x i64> diff --git a/clang/test/CodeGenCXX/vector-splat-conversion.cpp b/clang/test/CodeGenCXX/vector-splat-conversion.cpp index 08b2fa8e96cb44..95c744043c0d8e 100644 --- a/clang/test/CodeGenCXX/vector-splat-conversion.cpp +++ b/clang/test/CodeGenCXX/vector-splat-conversion.cpp @@ -22,28 +22,28 @@ typedef __attribute__((__ext_vector_type__(4))) __int128 bigint4; // CHECK-LABEL: define{{.*}} void @_Z14BoolConversionv void BoolConversion() { - // CHECK: store <4 x i32> + // CHECK: store <4 x i32> splat (i32 -1) int4 intsT = (int4)true; // CHECK: store <4 x i32> zeroinitializer int4 intsF = (int4)false; - // CHECK: store <4 x float> + // CHECK: store <4 x float> splat (float -1.000000e+00) float4 floatsT = (float4)true; // CHECK: store <4 x float> zeroinitializer float4 floatsF = (float4)false; - // CHECK: store <4 x i128> + // CHECK: store <4 x i128> splat (i128 -1) bigint4 bigintsT = (bigint4)true; // CHECK: store <4 x i128> zeroinitializer bigint4 bigintsF = (bigint4)false; - // CHECK: store <4 x i32> + // CHECK: store <4 x i32> splat (i32 -1) constexpr int4 cIntsT = (int4)true; // CHECK: store <4 x i32> zeroinitializer constexpr int4 cIntsF = (int4)false; - // CHECK: store <4 x float> + // CHECK: store <4 x float> splat (float -1.000000e+00) constexpr float4 cFloatsT = (float4)true; // CHECK: store <4 x float> zeroinitializer constexpr float4 cFloatsF = (float4)false; - // CHECK: store <4 x i128> + // CHECK: store <4 x i128> splat (i128 -1) constexpr bigint4 cBigintsT = (bigint4)true; // CHECK: store <4 x i128> zeroinitializer constexpr bigint4 cBigintsF = (bigint4)false; diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl index 6478ea67e32a0d..dd7dfd17697037 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl @@ -3,7 +3,7 @@ // CHECK-LABEL: f3_to_d4 // CHECK: [[f3:%.*]] = alloca <3 x float> // CHECK: [[d4:%.*]] = alloca <4 x double> -// CHECK: store <3 x float> , ptr [[f3]] +// CHECK: store <3 x float> splat (float 1.000000e+00), ptr [[f3]] // CHECK: [[vecf3:%.*]] = load <3 x float>, ptr [[f3]] // CHECK: [[vecf4:%.*]] = shufflevector <3 x float> [[vecf3]], <3 x float> poison, <4 x i32> // CHECK: [[vecd4:%.*]] = fpext <4 x float> [[vecf4]] to <4 x double> @@ -16,7 +16,7 @@ void f3_to_d4() { // CHECK-LABEL: f3_to_f2 // CHECK: [[f3:%.*]] = alloca <3 x float> // CHECK: [[f2:%.*]] = alloca <2 x float> -// CHECK: store <3 x float> , ptr [[f3]] +// CHECK: store <3 x float> splat (float 2.000000e+00), ptr [[f3]] // CHECK: [[vecf3:%.*]] = load <3 x float>, ptr [[f3]] // CHECK: [[vecf2:%.*]] = shufflevector <3 x float> [[vecf3]], <3 x float> poison, <2 x i32> // CHECK: store <2 x float> [[vecf2]], ptr [[f2]] @@ -28,7 +28,7 @@ void f3_to_f2() { // CHECK-LABEL: d4_to_f2 // CHECK: [[d4:%.*]] = alloca <4 x double> // CHECK: [[f2:%.*]] = alloca <2 x float> -// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: store <4 x double> splat (double 3.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[vecf4:%.*]] = fptrunc <4 x double> [[vecd4]] to <4 x float> // CHECK: [[vecf2:%.*]] = shufflevector <4 x float> [[vecf4]], <4 x float> poison, <2 x i32> @@ -41,7 +41,7 @@ void d4_to_f2() { // CHECK-LABEL: f2_to_i2 // CHECK: [[f2:%.*]] = alloca <2 x float> // CHECK: [[i2:%.*]] = alloca <2 x i32> -// CHECK: store <2 x float> , ptr [[f2]] +// CHECK: store <2 x float> splat (float 4.000000e+00), ptr [[f2]] // CHECK: [[vecf2:%.*]] = load <2 x float>, ptr [[f2]] // CHECK: [[veci2:%.*]] = fptosi <2 x float> [[vecf2]] to <2 x i32> // CHECK: store <2 x i32> [[veci2]], ptr [[i2]] @@ -53,7 +53,7 @@ void f2_to_i2() { // CHECK-LABEL: d4_to_i2 // CHECK: [[f4:%.*]] = alloca <4 x double> // CHECK: [[i2:%.*]] = alloca <2 x i32> -// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: store <4 x double> splat (double 5.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[veci4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i32> // CHECK: [[veci2:%.*]] = shufflevector <4 x i32> [[veci4]], <4 x i32> poison, <2 x i32> @@ -66,7 +66,7 @@ void d4_to_i2() { // CHECK-LABEL: d4_to_l4 // CHECK: [[d4:%.*]] = alloca <4 x double> // CHECK: [[l4:%.*]] = alloca <4 x i64> -// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: store <4 x double> splat (double 6.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[vecl4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i64> // CHECK: store <4 x i64> [[vecl4]], ptr [[l4]] @@ -79,7 +79,7 @@ void d4_to_l4() { // CHECK-LABEL: l4_to_i2 // CHECK: [[l4:%.*]] = alloca <4 x i64> // CHECK: [[i2:%.*]] = alloca <2 x i32> -// CHECK: store <4 x i64> , ptr [[l4]] +// CHECK: store <4 x i64> splat (i64 7), ptr [[l4]] // CHECK: [[vecl4:%.*]] = load <4 x i64>, ptr [[l4]] // CHECK: [[veci4:%.*]] = trunc <4 x i64> [[vecl4]] to <4 x i32> // CHECK: [[veci2:%.*]] = shufflevector <4 x i32> [[veci4]], <4 x i32> poison, <2 x i32> @@ -92,7 +92,7 @@ void l4_to_i2() { // CHECK-LABEL: i2_to_b2 // CHECK: [[l2:%.*]] = alloca <2 x i32> // CHECK: [[b2:%.*]] = alloca i8 -// CHECK: store <2 x i32> , ptr [[i2]] +// CHECK: store <2 x i32> splat (i32 8), ptr [[i2]] // CHECK: [[veci2:%.*]] = load <2 x i32>, ptr [[i2]] // CHECK: [[vecb2:%.*]] = icmp ne <2 x i32> [[veci2]], zeroinitializer // CHECK: [[vecb8:%.*]] = shufflevector <2 x i1> [[vecb2]], <2 x i1> poison, <8 x i32> @@ -106,7 +106,7 @@ void i2_to_b2() { // CHECK-LABEL: d4_to_b2 // CHECK: [[d4:%.*]] = alloca <4 x double> // CHECK: [[b2:%.*]] = alloca i8 -// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[vecb4:%.*]] = fcmp une <4 x double> [[vecd4]], zeroinitializer // CHECK: [[vecd2:%.*]] = shufflevector <4 x i1> [[vecb4]], <4 x i1> poison, <2 x i32> @@ -121,7 +121,7 @@ void d4_to_b2() { // CHECK-LABEL: d4_to_d1 // CHECK: [[d4:%.*]] = alloca <4 x double> // CHECK: [[d1:%.*]] = alloca <1 x double> -// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[vecd1:%.*]] = shufflevector <4 x double> [[vecd4]], <4 x double> poison, <1 x i32> zeroinitializer // CHECK: store <1 x double> [[vecd1]], ptr [[d1:%.*]], align 8 @@ -133,7 +133,7 @@ void d4_to_d1() { // CHECK-LABEL: d4_to_dScalar // CHECK: [[d4:%.*]] = alloca <4 x double> // CHECK: [[d:%.*]] = alloca double -// CHECK: store <4 x double> , ptr [[d4]] +// CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] // CHECK: [[d4x:%.*]] = extractelement <4 x double> [[vecd4]], i32 0 // CHECK: store double [[d4x]], ptr [[d]] diff --git a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl index 6395ddc2fee2a2..94a95107eea69c 100644 --- a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl +++ b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl @@ -20,7 +20,7 @@ float4 ToFourFloats(float V){ // CHECK-LABEL: FillOne // CHECK: [[vec1Ptr:%.*]] = alloca <1 x i32>, align 4 -// CHECK: store <1 x i32> , ptr [[vec1Ptr]], align 4 +// CHECK: store <1 x i32> splat (i32 1), ptr [[vec1Ptr]], align 4 // CHECK: [[vec1:%.*]] = load <1 x i32>, ptr [[vec1Ptr]], align 4 // CHECK: [[vec2:%.*]] = shufflevector <1 x i32> [[vec1]], <1 x i32> poison, <2 x i32> zeroinitializer // CHECK: ret <2 x i32> [[vec2]] @@ -30,7 +30,7 @@ int2 FillOne(){ // CHECK-LABEL: FillOneUnsigned // CHECK: [[vec1Ptr:%.*]] = alloca <1 x i32>, align 4 -// CHECK: store <1 x i32> , ptr [[vec1Ptr]], align 4 +// CHECK: store <1 x i32> splat (i32 1), ptr [[vec1Ptr]], align 4 // CHECK: [[vec1:%.*]] = load <1 x i32>, ptr [[vec1Ptr]], align 4 // CHECK: [[vec3:%.*]] = shufflevector <1 x i32> [[vec1]], <1 x i32> poison, <3 x i32> zeroinitializer // CHECK: ret <3 x i32> [[vec3]] @@ -40,7 +40,7 @@ uint3 FillOneUnsigned(){ // CHECK-LABEL: FillOneUnsignedLong // CHECK: [[vec1Ptr:%.*]] = alloca <1 x i64>, align 8 -// CHECK: store <1 x i64> , ptr [[vec1Ptr]], align 8 +// CHECK: store <1 x i64> splat (i64 1), ptr [[vec1Ptr]], align 8 // CHECK: [[vec1:%.*]] = load <1 x i64>, ptr [[vec1Ptr]], align 8 // CHECK: [[vec4:%.*]] = shufflevector <1 x i64> [[vec1]], <1 x i64> poison, <4 x i32> zeroinitializer // CHECK: ret <4 x i64> [[vec4]] @@ -50,7 +50,7 @@ vector FillOneUnsignedLong(){ // CHECK-LABEL: FillTwoPointFive // CHECK: [[vec1Ptr:%.*]] = alloca <1 x double>, align 8 -// CHECK: store <1 x double> , ptr [[vec1Ptr]], align 8 +// CHECK: store <1 x double> splat (double 2.500000e+00), ptr [[vec1Ptr]], align 8 // CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[vec1Ptr]], align 8 // CHECK: [[vec2:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <2 x i32> zeroinitializer // CHECK: ret <2 x double> [[vec2]] @@ -60,7 +60,7 @@ double2 FillTwoPointFive(){ // CHECK-LABEL: FillOneHalf // CHECK: [[vec1Ptr:%.*]] = alloca <1 x double>, align 8 -// CHECK: store <1 x double> , ptr [[vec1Ptr]], align 8 +// CHECK: store <1 x double> splat (double 5.000000e-01), ptr [[vec1Ptr]], align 8 // CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[vec1Ptr]], align 8 // CHECK: [[vec3:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <3 x i32> zeroinitializer // CHECK: ret <3 x double> [[vec3]] @@ -70,7 +70,7 @@ double3 FillOneHalf(){ // CHECK-LABEL: FillTwoPointFiveFloat // CHECK: [[vec1Ptr:%.*]] = alloca <1 x float>, align 4 -// CHECK: store <1 x float> , ptr [[vec1Ptr]], align 4 +// CHECK: store <1 x float> splat (float 2.500000e+00), ptr [[vec1Ptr]], align 4 // CHECK: [[vec1:%.*]] = load <1 x float>, ptr [[vec1Ptr]], align 4 // CHECK: [[vec4:%.*]] = shufflevector <1 x float> [[vec1]], <1 x float> poison, <4 x i32> zeroinitializer // CHECK: ret <4 x float> [[vec4]] @@ -83,7 +83,7 @@ float4 FillTwoPointFiveFloat(){ // CHECK-LABEL: FillOneHalfFloat // CHECK: [[vec1Ptr:%.*]] = alloca <1 x float>, align 4 -// CHECK: store <1 x float> , ptr [[vec1Ptr]], align 4 +// CHECK: store <1 x float> splat (float 5.000000e-01), ptr [[vec1Ptr]], align 4 // CHECK: [[vec1:%.*]] = load <1 x float>, ptr [[vec1Ptr]], align 4 // CHECK: [[el0:%.*]] = extractelement <1 x float> [[vec1]], i32 0 // CHECK: [[vec1Splat:%.*]] = insertelement <1 x float> poison, float [[el0]], i64 0 @@ -116,7 +116,7 @@ float2 HowManyFloats(float V) { // CHECK-LABEL: AllRighty // CHECK: [[Tmp:%.*]] = alloca <1 x double>, align 8 -// CHECK: store <1 x double> , ptr [[Tmp]], align 8 +// CHECK: store <1 x double> splat (double 1.000000e+00), ptr [[Tmp]], align 8 // CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[Tmp]], align 8 // CHECK: [[vec3:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <3 x i32> zeroinitializer // CHECK: [[vec3f:%.*]] = fptrunc <3 x double> [[vec3]] to <3 x float> @@ -128,7 +128,7 @@ float3 AllRighty() { // CHECK-LABEL: AllRighty2 // CHECK: [[vec1Ptr:%.*]] = alloca <1 x float>, align 4 -// CHECK: store <1 x float> , ptr [[vec1Ptr]], align 4 +// CHECK: store <1 x float> splat (float 1.000000e+00), ptr [[vec1Ptr]], align 4 // CHECK: [[vec1:%.*]] = load <1 x float>, ptr [[vec1Ptr]], align 4 // CHECK: [[vec3:%.*]] = shufflevector <1 x float> [[vec1]], <1 x float> poison, <3 x i32> // CHECK: ret <3 x float> [[vec3]] diff --git a/clang/test/CodeGenHLSL/builtins/rcp.hlsl b/clang/test/CodeGenHLSL/builtins/rcp.hlsl index eb89bcc4c7c01e..83fe33406c7c89 100644 --- a/clang/test/CodeGenHLSL/builtins/rcp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/rcp.hlsl @@ -25,31 +25,31 @@ half test_rcp_half(half p0) { return rcp(p0); } // DXIL_NATIVE_HALF: define noundef <2 x half> @ // SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @ -// NATIVE_HALF: %hlsl.rcp = fdiv <2 x half> , %{{.*}} +// NATIVE_HALF: %hlsl.rcp = fdiv <2 x half> splat (half 0xH3C00), %{{.*}} // NATIVE_HALF: ret <2 x half> %hlsl.rcp // DXIL_NO_HALF: define noundef <2 x float> @ // SPIR_NO_HALF: define spir_func noundef <2 x float> @ -// NO_HALF: %hlsl.rcp = fdiv <2 x float> , %{{.*}} +// NO_HALF: %hlsl.rcp = fdiv <2 x float> splat (float 1.000000e+00), %{{.*}} // NO_HALF: ret <2 x float> %hlsl.rcp half2 test_rcp_half2(half2 p0) { return rcp(p0); } // DXIL_NATIVE_HALF: define noundef <3 x half> @ // SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @ -// NATIVE_HALF: %hlsl.rcp = fdiv <3 x half> , %{{.*}} +// NATIVE_HALF: %hlsl.rcp = fdiv <3 x half> splat (half 0xH3C00), %{{.*}} // NATIVE_HALF: ret <3 x half> %hlsl.rcp // DXIL_NO_HALF: define noundef <3 x float> @ // SPIR_NO_HALF: define spir_func noundef <3 x float> @ -// NO_HALF: %hlsl.rcp = fdiv <3 x float> , %{{.*}} +// NO_HALF: %hlsl.rcp = fdiv <3 x float> splat (float 1.000000e+00), %{{.*}} // NO_HALF: ret <3 x float> %hlsl.rcp half3 test_rcp_half3(half3 p0) { return rcp(p0); } // DXIL_NATIVE_HALF: define noundef <4 x half> @ // SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @ -// NATIVE_HALF: %hlsl.rcp = fdiv <4 x half> , %{{.*}} +// NATIVE_HALF: %hlsl.rcp = fdiv <4 x half> splat (half 0xH3C00), %{{.*}} // NATIVE_HALF: ret <4 x half> %hlsl.rcp // DXIL_NO_HALF: define noundef <4 x float> @ // SPIR_NO_HALF: define spir_func noundef <4 x float> @ -// NO_HALF: %hlsl.rcp = fdiv <4 x float> , %{{.*}} +// NO_HALF: %hlsl.rcp = fdiv <4 x float> splat (float 1.000000e+00), %{{.*}} // NO_HALF: ret <4 x float> %hlsl.rcp half4 test_rcp_half4(half4 p0) { return rcp(p0); } @@ -61,19 +61,19 @@ float test_rcp_float(float p0) { return rcp(p0); } // DXIL_CHECK: define noundef <2 x float> @ // SPIR_CHECK: define spir_func noundef <2 x float> @ -// CHECK: %hlsl.rcp = fdiv <2 x float> , %{{.*}} +// CHECK: %hlsl.rcp = fdiv <2 x float> splat (float 1.000000e+00), %{{.*}} // CHECK: ret <2 x float> %hlsl.rcp float2 test_rcp_float2(float2 p0) { return rcp(p0); } // DXIL_CHECK: define noundef <3 x float> @ // SPIR_CHECK: define spir_func noundef <3 x float> @ -// CHECK: %hlsl.rcp = fdiv <3 x float> , %{{.*}} +// CHECK: %hlsl.rcp = fdiv <3 x float> splat (float 1.000000e+00), %{{.*}} // CHECK: ret <3 x float> %hlsl.rcp float3 test_rcp_float3(float3 p0) { return rcp(p0); } // DXIL_CHECK: define noundef <4 x float> @ // SPIR_CHECK: define spir_func noundef <4 x float> @ -// CHECK: %hlsl.rcp = fdiv <4 x float> , %{{.*}} +// CHECK: %hlsl.rcp = fdiv <4 x float> splat (float 1.000000e+00), %{{.*}} // CHECK: ret <4 x float> %hlsl.rcp float4 test_rcp_float4(float4 p0) { return rcp(p0); } @@ -85,18 +85,18 @@ double test_rcp_double(double p0) { return rcp(p0); } // DXIL_CHECK: define noundef <2 x double> @ // SPIR_CHECK: define spir_func noundef <2 x double> @ -// CHECK: %hlsl.rcp = fdiv <2 x double> , %{{.*}} +// CHECK: %hlsl.rcp = fdiv <2 x double> splat (double 1.000000e+00), %{{.*}} // CHECK: ret <2 x double> %hlsl.rcp double2 test_rcp_double2(double2 p0) { return rcp(p0); } // DXIL_CHECK: define noundef <3 x double> @ // SPIR_CHECK: define spir_func noundef <3 x double> @ -// CHECK: %hlsl.rcp = fdiv <3 x double> , %{{.*}} +// CHECK: %hlsl.rcp = fdiv <3 x double> splat (double 1.000000e+00), %{{.*}} // CHECK: ret <3 x double> %hlsl.rcp double3 test_rcp_double3(double3 p0) { return rcp(p0); } // DXIL_CHECK: define noundef <4 x double> @ // SPIR_CHECK: define spir_func noundef <4 x double> @ -// CHECK: %hlsl.rcp = fdiv <4 x double> , %{{.*}} +// CHECK: %hlsl.rcp = fdiv <4 x double> splat (double 1.000000e+00), %{{.*}} // CHECK: ret <4 x double> %hlsl.rcp double4 test_rcp_double4(double4 p0) { return rcp(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/sign.hlsl b/clang/test/CodeGenHLSL/builtins/sign.hlsl index 1cdefa815b103f..8cc910933f462b 100644 --- a/clang/test/CodeGenHLSL/builtins/sign.hlsl +++ b/clang/test/CodeGenHLSL/builtins/sign.hlsl @@ -121,17 +121,17 @@ int test_sign_uint16_t(uint16_t p0) { return sign(p0); } // NATIVE_HALF: define [[FNATTRS]] <2 x i32> @ // NATIVE_HALF: [[CMP:%.*]] = icmp eq <2 x i16> [[ARG:%.*]], zeroinitializer -// NATIVE_HALF: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> +// NATIVE_HALF: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 1) int2 test_sign_uint16_t2(uint16_t2 p0) { return sign(p0); } // NATIVE_HALF: define [[FNATTRS]] <3 x i32> @ // NATIVE_HALF: [[CMP:%.*]] = icmp eq <3 x i16> [[ARG:%.*]], zeroinitializer -// NATIVE_HALF: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> +// NATIVE_HALF: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> splat (i32 1) int3 test_sign_uint16_t3(uint16_t3 p0) { return sign(p0); } // NATIVE_HALF: define [[FNATTRS]] <4 x i32> @ // NATIVE_HALF: [[CMP:%.*]] = icmp eq <4 x i16> [[ARG:%.*]], zeroinitializer -// NATIVE_HALF: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> +// NATIVE_HALF: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 1) int4 test_sign_uint16_t4(uint16_t4 p0) { return sign(p0); } #endif // __HLSL_ENABLE_16_BIT @@ -164,17 +164,17 @@ int test_sign_uint(uint p0) { return sign(p0); } // CHECK: define [[FNATTRS]] <2 x i32> @ // CHECK: [[CMP:%.*]] = icmp eq <2 x i32> [[ARG:%.*]], zeroinitializer -// CHECK: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> +// CHECK: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 1) int2 test_sign_uint2(uint2 p0) { return sign(p0); } // CHECK: define [[FNATTRS]] <3 x i32> @ // CHECK: [[CMP:%.*]] = icmp eq <3 x i32> [[ARG:%.*]], zeroinitializer -// CHECK: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> +// CHECK: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> splat (i32 1) int3 test_sign_uint3(uint3 p0) { return sign(p0); } // CHECK: define [[FNATTRS]] <4 x i32> @ // CHECK: [[CMP:%.*]] = icmp eq <4 x i32> [[ARG:%.*]], zeroinitializer -// CHECK: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> +// CHECK: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 1) int4 test_sign_uint4(uint4 p0) { return sign(p0); } @@ -206,15 +206,15 @@ int test_sign_uint64_t(uint64_t p0) { return sign(p0); } // CHECK: define [[FNATTRS]] <2 x i32> @ // CHECK: [[CMP:%.*]] = icmp eq <2 x i64> [[ARG:%.*]], zeroinitializer -// CHECK: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> +// CHECK: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 1) int2 test_sign_uint64_t2(uint64_t2 p0) { return sign(p0); } // CHECK: define [[FNATTRS]] <3 x i32> @ // CHECK: [[CMP:%.*]] = icmp eq <3 x i64> [[ARG:%.*]], zeroinitializer -// CHECK: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> +// CHECK: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> splat (i32 1) int3 test_sign_uint64_t3(uint64_t3 p0) { return sign(p0); } // CHECK: define [[FNATTRS]] <4 x i32> @ // CHECK: [[CMP:%.*]] = icmp eq <4 x i64> [[ARG:%.*]], zeroinitializer -// CHECK: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> +// CHECK: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 1) int4 test_sign_uint64_t4(uint64_t4 p0) { return sign(p0); } diff --git a/clang/test/CodeGenOpenCL/bool_cast.cl b/clang/test/CodeGenOpenCL/bool_cast.cl index 8ba8c8a0176234..3cb2c0c09734c4 100644 --- a/clang/test/CodeGenOpenCL/bool_cast.cl +++ b/clang/test/CodeGenOpenCL/bool_cast.cl @@ -22,12 +22,12 @@ void kernel ker() { uchar4 vc; vc = (uchar4)true; -// CHECK: store <4 x i8> , ptr %vc, align 4 +// CHECK: store <4 x i8> splat (i8 -1), ptr %vc, align 4 unsigned char c; c = (unsigned char)true; // CHECK: store i8 1, ptr %c, align 1 float4 vf; vf = (float4)true; -// CHECK: store <4 x float> +// CHECK: store <4 x float> splat (float -1.000000e+00) } diff --git a/clang/test/CodeGenOpenCL/logical-ops.cl b/clang/test/CodeGenOpenCL/logical-ops.cl index f083a8580ee780..f3c8bfff6399d0 100644 --- a/clang/test/CodeGenOpenCL/logical-ops.cl +++ b/clang/test/CodeGenOpenCL/logical-ops.cl @@ -10,23 +10,23 @@ typedef double double4 __attribute((ext_vector_type(4))); // CHECK: floatops kernel void floatops(global int4 *out, global float4 *fout) { - // CHECK: store <4 x i32> + // CHECK: store <4 x i32> splat (i32 -1) out[0] = (float4)(1, 1, 1, 1) && 1.0f; // CHECK: store <4 x i32> zeroinitializer out[1] = (float4)(0, 0, 0, 0) && (float4)(0, 0, 0, 0); - // CHECK: store <4 x i32> + // CHECK: store <4 x i32> splat (i32 -1) out[2] = (float4)(0, 0, 0, 0) || (float4)(1, 1, 1, 1); // CHECK: store <4 x i32> zeroinitializer out[3] = (float4)(0, 0, 0, 0) || 0.0f; - // CHECK: store <4 x i32> + // CHECK: store <4 x i32> splat (i32 -1) out[4] = !(float4)(0, 0, 0, 0); // CHECK: store <4 x i32> zeroinitializer out[5] = !(float4)(1, 2, 3, 4); // CHECK: store <4 x i32> out[6] = !(float4)(0, 1, 0, 1); - // CHECK: store <4 x float> + // CHECK: store <4 x float> splat (float 1.000000e+00) fout[0] = (float4)(!0.0f); // CHECK: store <4 x float> zeroinitializer fout[1] = (float4)(!1.0f); @@ -34,23 +34,23 @@ kernel void floatops(global int4 *out, global float4 *fout) { // CHECK: doubleops kernel void doubleops(global long4 *out, global double4 *dout) { - // CHECK: store <4 x i64> + // CHECK: store <4 x i64> splat (i64 -1) out[0] = (double4)(1, 1, 1, 1) && 1.0; // CHECK: store <4 x i64> zeroinitializer out[1] = (double4)(0, 0, 0, 0) && (double4)(0, 0, 0, 0); - // CHECK: store <4 x i64> + // CHECK: store <4 x i64> splat (i64 -1) out[2] = (double4)(0, 0, 0, 0) || (double4)(1, 1, 1, 1); // CHECK: store <4 x i64> zeroinitializer out[3] = (double4)(0, 0, 0, 0) || 0.0f; - // CHECK: store <4 x i64> + // CHECK: store <4 x i64> splat (i64 -1) out[4] = !(double4)(0, 0, 0, 0); // CHECK: store <4 x i64> zeroinitializer out[5] = !(double4)(1, 2, 3, 4); // CHECK: store <4 x i64> out[6] = !(double4)(0, 1, 0, 1); - // CHECK: store <4 x double> + // CHECK: store <4 x double> splat (double 1.000000e+00) dout[0] = (double4)(!0.0f); // CHECK: store <4 x double> zeroinitializer dout[1] = (double4)(!1.0f); diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl b/clang/test/CodeGenOpenCL/partial_initializer.cl index 5cc4e2b246003a..d5ef01d55240c5 100644 --- a/clang/test/CodeGenOpenCL/partial_initializer.cl +++ b/clang/test/CodeGenOpenCL/partial_initializer.cl @@ -21,7 +21,7 @@ StrucTy GS = {1, 2}; // CHECK: @GV1 ={{.*}} addrspace(1) global <4 x i32> , align 16 int4 GV1 = (int4)((int2)(1,2),3,4); -// CHECK: @GV2 ={{.*}} addrspace(1) global <4 x i32> , align 16 +// CHECK: @GV2 ={{.*}} addrspace(1) global <4 x i32> splat (i32 1), align 16 int4 GV2 = (int4)(1); // CHECK: @__const.f.S = private unnamed_addr addrspace(2) constant %struct.StrucTy { i32 1, i32 2, i32 0 }, align 4 @@ -57,7 +57,7 @@ void f(void) { // CHECK: store <4 x i32> %[[v7]], ptr %[[V1]], align 16 int4 V1 = (int4)((int2)(1,2),3,4); - // CHECK: store <4 x i32> , ptr %[[V2]], align 16 + // CHECK: store <4 x i32> splat (i32 1), ptr %[[V2]], align 16 int4 V2 = (int4)(1); } diff --git a/clang/test/CodeGenOpenCL/shifts.cl b/clang/test/CodeGenOpenCL/shifts.cl index 98a464b74ce58c..d08aa232cd3f8c 100644 --- a/clang/test/CodeGenOpenCL/shifts.cl +++ b/clang/test/CodeGenOpenCL/shifts.cl @@ -47,7 +47,7 @@ typedef __attribute__((ext_vector_type(4))) int int4; //OPT: @vectorVectorTest int4 vectorVectorTest(int4 a,int4 b) { - //OPT: [[VM:%.+]] = and <4 x i32> %b, + //OPT: [[VM:%.+]] = and <4 x i32> %b, splat (i32 31) //OPT-NEXT: [[VC:%.+]] = shl <4 x i32> %a, [[VM]] int4 c = a << b; //OPT-NEXT: [[VF:%.+]] = add <4 x i32> [[VC]], @@ -62,10 +62,10 @@ int4 vectorVectorTest(int4 a,int4 b) { int4 vectorScalarTest(int4 a,int b) { //NOOPT: [[SP0:%.+]] = insertelement <4 x i32> poison //NOOPT: [[SP1:%.+]] = shufflevector <4 x i32> [[SP0]], <4 x i32> poison, <4 x i32> zeroinitializer - //NOOPT: [[VSM:%.+]] = and <4 x i32> [[SP1]], + //NOOPT: [[VSM:%.+]] = and <4 x i32> [[SP1]], splat (i32 31) //NOOPT: [[VSC:%.+]] = shl <4 x i32> [[VSS:%.+]], [[VSM]] int4 c = a << b; - //NOOPT: [[VSF:%.+]] = shl <4 x i32> [[VSC1:%.+]], + //NOOPT: [[VSF:%.+]] = shl <4 x i32> [[VSC1:%.+]], splat (i32 2) //NOOPT: [[VSA:%.+]] = add <4 x i32> [[VSC2:%.+]], [[VSF]] int4 d = {1, 1, 1, 1}; int4 f = c + (d << 34); diff --git a/clang/test/CodeGenOpenCL/vector_literals.cl b/clang/test/CodeGenOpenCL/vector_literals.cl index f69f339ca99aaf..3bbb9e566dbcab 100644 --- a/clang/test/CodeGenOpenCL/vector_literals.cl +++ b/clang/test/CodeGenOpenCL/vector_literals.cl @@ -48,7 +48,7 @@ void vector_literals_valid() { //CHECK: shufflevector <4 x i32> , <4 x i32> %{{.+}}, <4 x i32> int4 a_1_3 = (int4)(1, (int3)(2, 3, 4)); - //CHECK: store <4 x i32> , ptr %a + //CHECK: store <4 x i32> splat (i32 1), ptr %a int4 a = (int4)(1); //CHECK: load <4 x i32>, ptr %a @@ -60,7 +60,7 @@ void vector_literals_valid() { //CHECK: shufflevector <8 x i32> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> int8 b = (int8)(1, 2, a.xy, a); - //CHECK: store <4 x float> , ptr %V2 + //CHECK: store <4 x float> splat (float 1.000000e+00), ptr %V2 float4 V2 = (float4)(1); } diff --git a/clang/test/Headers/__clang_hip_math_deprecated.hip b/clang/test/Headers/__clang_hip_math_deprecated.hip index 17b90eda20572d..caba3e9ad83d18 100644 --- a/clang/test/Headers/__clang_hip_math_deprecated.hip +++ b/clang/test/Headers/__clang_hip_math_deprecated.hip @@ -21,7 +21,7 @@ extern "C" __device__ _Float16 test_rcpf16_wrapper(_Float16 x) { // CHECK-LABEL: @test_rcp2f16_wrapper( // CHECK-NEXT: entry: -// CHECK-NEXT: [[DIV_I:%.*]] = fdiv contract <2 x half> , [[X:%.*]] +// CHECK-NEXT: [[DIV_I:%.*]] = fdiv contract <2 x half> splat (half 0xH3C00), [[X:%.*]] // CHECK-NEXT: ret <2 x half> [[DIV_I]] // extern "C" __device__ __2f16 test_rcp2f16_wrapper(__2f16 x) { diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c index 8da1d97fe13280..d27756259fa2f6 100644 --- a/clang/test/Headers/wasm.c +++ b/clang/test/Headers/wasm.c @@ -482,7 +482,7 @@ v128_t test_f64x2_const(void) { // CHECK-LABEL: @test_i8x16_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 707406378) // v128_t test_i8x16_const_splat(void) { return wasm_i8x16_const_splat(42); @@ -490,7 +490,7 @@ v128_t test_i8x16_const_splat(void) { // CHECK-LABEL: @test_u8x16_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 707406378) // v128_t test_u8x16_const_splat(void) { return wasm_u8x16_const_splat(42); @@ -498,7 +498,7 @@ v128_t test_u8x16_const_splat(void) { // CHECK-LABEL: @test_i16x8_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 2752554) // v128_t test_i16x8_const_splat(void) { return wasm_i16x8_const_splat(42); @@ -506,7 +506,7 @@ v128_t test_i16x8_const_splat(void) { // CHECK-LABEL: @test_u16x8_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 2752554) // v128_t test_u16x8_const_splat(void) { return wasm_u16x8_const_splat(42); @@ -514,7 +514,7 @@ v128_t test_u16x8_const_splat(void) { // CHECK-LABEL: @test_i32x4_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 42) // v128_t test_i32x4_const_splat(void) { return wasm_i32x4_const_splat(42); @@ -522,7 +522,7 @@ v128_t test_i32x4_const_splat(void) { // CHECK-LABEL: @test_u32x4_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 42) // v128_t test_u32x4_const_splat(void) { return wasm_u32x4_const_splat(42); @@ -546,7 +546,7 @@ v128_t test_u64x2_const_splat(void) { // CHECK-LABEL: @test_f32x4_const_splat( // CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> +// CHECK-NEXT: ret <4 x i32> splat (i32 1109917696) // v128_t test_f32x4_const_splat(void) { return wasm_f32x4_const_splat(42); @@ -1462,7 +1462,7 @@ v128_t test_f64x2_ge(v128_t a, v128_t b) { // CHECK-LABEL: @test_v128_not( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1) // CHECK-NEXT: ret <4 x i32> [[NOT_I]] // v128_t test_v128_not(v128_t a) { @@ -1498,7 +1498,7 @@ v128_t test_v128_xor(v128_t a, v128_t b) { // CHECK-LABEL: @test_v128_andnot( // CHECK-NEXT: entry: -// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[B:%.*]], +// CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[B:%.*]], splat (i32 -1) // CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[A:%.*]], [[NOT_I]] // CHECK-NEXT: ret <4 x i32> [[AND_I]] // diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 4bc7d9e68280d5..53f2cda6d36a4f 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1689,6 +1689,23 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, if (isa(CV) || isa(CV)) { auto *CVVTy = cast(CV->getType()); Type *ETy = CVVTy->getElementType(); + + // Use the same shorthand for splat vector (i.e. "splat(Ty val)") as is + // permitted on IR input to reduce the output changes when enabling + // UseConstant{Int,FP}ForFixedLengthSplat. + // TODO: Remove this block when the UseConstant{Int,FP}ForFixedLengthSplat + // options are removed. + if (auto *SplatVal = CV->getSplatValue()) { + if (isa(SplatVal) || isa(SplatVal)) { + Out << "splat ("; + WriterCtx.TypePrinter->print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, SplatVal, WriterCtx); + Out << ')'; + return; + } + } + Out << '<'; WriterCtx.TypePrinter->print(ETy, Out); Out << ' '; diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll index aaffd97b92b2de..b329a5607acb97 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll @@ -70,15 +70,15 @@ define i32 @fsub(i32 %arg) { define i32 @fneg_idiom(i32 %arg) { ; CHECK-LABEL: 'fneg_idiom' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = fsub half 0xH8000, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> splat (float -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F16 = fsub half -0.0, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll index 52f6f73525a3b9..303bcfa289577c 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll @@ -2091,11 +2091,11 @@ define void @extmulv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i6 define void @extmul_const(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i64) { ; CHECK-LABEL: 'extmul_const' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, splat (i16 10) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, splat (i16 10) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16b = zext <8 x i8> %i8 to <8 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and = and <8 x i16> %sl1_8_16, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and = and <8 x i16> %sl1_8_16, splat (i16 255) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %aal_8_16 = mul <8 x i16> %zl1_8_16b, %and ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/AArch64/div.ll b/llvm/test/Analysis/CostModel/AArch64/div.ll index 2ceaf0c6f536af..ada0be66c27b5a 100644 --- a/llvm/test/Analysis/CostModel/AArch64/div.ll +++ b/llvm/test/Analysis/CostModel/AArch64/div.ll @@ -192,21 +192,21 @@ define i32 @sdiv_uniformconst() { ; CHECK-LABEL: 'sdiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 7 ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I128 = sdiv i128 undef, 7 @@ -238,21 +238,21 @@ define i32 @udiv_uniformconst() { ; CHECK-LABEL: 'udiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, 7 ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I128 = udiv i128 undef, 7 @@ -376,21 +376,21 @@ define i32 @sdiv_uniformconstpow2() { ; CHECK-LABEL: 'sdiv_uniformconstpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = sdiv i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = sdiv i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = sdiv i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = sdiv i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I128 = sdiv i128 undef, 16 @@ -422,21 +422,21 @@ define i32 @udiv_uniformconstpow2() { ; CHECK-LABEL: 'udiv_uniformconstpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I128 = udiv i128 undef, 16 @@ -560,21 +560,21 @@ define i32 @sdiv_uniformconstnegpow2() { ; CHECK-LABEL: 'sdiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, -16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I128 = sdiv i128 undef, -16 @@ -606,21 +606,21 @@ define i32 @udiv_uniformconstnegpow2() { ; CHECK-LABEL: 'udiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, -16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I128 = udiv i128 undef, -16 diff --git a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll index dfed023f0119d4..dd93aed53c0f2a 100644 --- a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll +++ b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll @@ -7,7 +7,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define <16 x i8> @sdiv8xi16(<16 x i8> %x) { ; CHECK-LABEL: 'sdiv8xi16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, splat (i8 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div ; %div = sdiv <16 x i8> %x, @@ -16,7 +16,7 @@ define <16 x i8> @sdiv8xi16(<16 x i8> %x) { define <8 x i16> @sdiv16xi8(<8 x i16> %x) { ; CHECK-LABEL: 'sdiv16xi8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, splat (i16 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div ; %div = sdiv <8 x i16> %x, @@ -25,7 +25,7 @@ define <8 x i16> @sdiv16xi8(<8 x i16> %x) { define <4 x i32> @sdiv32xi4(<4 x i32> %x) { ; CHECK-LABEL: 'sdiv32xi4' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, splat (i32 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div ; %div = sdiv <4 x i32> %x, @@ -34,7 +34,7 @@ define <4 x i32> @sdiv32xi4(<4 x i32> %x) { define <16 x i8> @udiv8xi16(<16 x i8> %x) { ; CHECK-LABEL: 'udiv8xi16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <16 x i8> %x, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <16 x i8> %x, splat (i8 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div ; %div = udiv <16 x i8> %x, @@ -43,7 +43,7 @@ define <16 x i8> @udiv8xi16(<16 x i8> %x) { define <8 x i16> @udiv16xi8(<8 x i16> %x) { ; CHECK-LABEL: 'udiv16xi8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <8 x i16> %x, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <8 x i16> %x, splat (i16 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div ; %div = udiv <8 x i16> %x, @@ -52,7 +52,7 @@ define <8 x i16> @udiv16xi8(<8 x i16> %x) { define <4 x i32> @udiv32xi4(<4 x i32> %x) { ; CHECK-LABEL: 'udiv32xi4' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <4 x i32> %x, +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <4 x i32> %x, splat (i32 9) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div ; %div = udiv <4 x i32> %x, diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll index eab4efe0613b5d..632f26dfa53829 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll @@ -96,7 +96,7 @@ declare i19 @llvm.fshl.i19(i19, i19, i19) define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl ; entry: @@ -128,7 +128,7 @@ declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl ; entry: @@ -160,7 +160,7 @@ declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl ; entry: @@ -192,7 +192,7 @@ declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl ; entry: diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll index 1876f0410c2337..a0a579ae96a9ba 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll @@ -96,7 +96,7 @@ declare i19 @llvm.fshr.i19(i19, i19, i19) define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr ; entry: @@ -128,7 +128,7 @@ declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr ; entry: @@ -160,7 +160,7 @@ declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr ; entry: @@ -192,7 +192,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> ) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1)) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr ; entry: diff --git a/llvm/test/Analysis/CostModel/AArch64/logicalop.ll b/llvm/test/Analysis/CostModel/AArch64/logicalop.ll index ab4db969cb281a..5c71cf02741890 100644 --- a/llvm/test/Analysis/CostModel/AArch64/logicalop.ll +++ b/llvm/test/Analysis/CostModel/AArch64/logicalop.ll @@ -31,14 +31,14 @@ define void @vecop() { ; CHECK-THROUGHPUT-LABEL: 'vecop' ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'vecop' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll index 6f9f64a26851a1..de13125f665ecb 100644 --- a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll +++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll @@ -190,19 +190,19 @@ declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, define <4 x i8> @gather_load_4xi8_constant_mask(<4 x ptr> %ptrs) { ; CHECK: gather_load_4xi8_constant_mask ; CHECK-NEON-LABEL: 'gather_load_4xi8_constant_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv ; ; CHECK-SVE-128-LABEL: 'gather_load_4xi8_constant_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv ; ; CHECK-SVE-256-LABEL: 'gather_load_4xi8_constant_mask' -; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) +; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef) ; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv ; ; CHECK-SVE-512-LABEL: 'gather_load_4xi8_constant_mask' -; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) +; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef) ; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv ; %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) @@ -235,19 +235,19 @@ declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32 immarg, <4 define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x ptr> %ptrs) { ; CHECK: scatter_store_4xi8_constant_mask ; CHECK-NEON-LABEL: 'scatter_store_4xi8_constant_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-128-LABEL: 'scatter_store_4xi8_constant_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-256-LABEL: 'scatter_store_4xi8_constant_mask' -; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-512-LABEL: 'scatter_store_4xi8_constant_mask' -; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) @@ -280,19 +280,19 @@ declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32 immarg, <4 x i1> define <4 x i32> @gather_load_4xi32_constant_mask(<4 x ptr> %ptrs) { ; CHECK: gather_load_4xi32_constant_mask ; CHECK-NEON-LABEL: 'gather_load_4xi32_constant_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv ; ; CHECK-SVE-128-LABEL: 'gather_load_4xi32_constant_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv ; ; CHECK-SVE-256-LABEL: 'gather_load_4xi32_constant_mask' -; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) +; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv ; ; CHECK-SVE-512-LABEL: 'gather_load_4xi32_constant_mask' -; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) +; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv ; %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) @@ -325,19 +325,19 @@ declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32 immarg, < define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x ptr> %ptrs) { ; CHECK: scatter_store_4xi32_constant_mask ; CHECK-NEON-LABEL: 'scatter_store_4xi32_constant_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-128-LABEL: 'scatter_store_4xi32_constant_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-256-LABEL: 'scatter_store_4xi32_constant_mask' -; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-SVE-256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-512-LABEL: 'scatter_store_4xi32_constant_mask' -; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true)) ; CHECK-SVE-512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) diff --git a/llvm/test/Analysis/CostModel/AArch64/rem.ll b/llvm/test/Analysis/CostModel/AArch64/rem.ll index 7519bf1f7c5c6f..2f1e8c8bf8dfa4 100644 --- a/llvm/test/Analysis/CostModel/AArch64/rem.ll +++ b/llvm/test/Analysis/CostModel/AArch64/rem.ll @@ -178,21 +178,21 @@ define i32 @urem_const() { define i32 @srem_uniformconst() { ; CHECK-LABEL: 'srem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -221,21 +221,21 @@ define i32 @srem_uniformconst() { define i32 @urem_uniformconst() { ; CHECK-LABEL: 'urem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -350,21 +350,21 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; CHECK-LABEL: 'srem_uniformconstpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = srem i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = srem i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 704 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 704 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -393,21 +393,21 @@ define i32 @srem_uniformconstpow2() { define i32 @urem_uniformconstpow2() { ; CHECK-LABEL: 'urem_uniformconstpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, 16 @@ -522,21 +522,21 @@ define i32 @urem_constnegpow2() { define i32 @srem_uniformconstnegpow2() { ; CHECK-LABEL: 'srem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, -16 @@ -565,21 +565,21 @@ define i32 @srem_uniformconstnegpow2() { define i32 @urem_uniformconstnegpow2() { ; CHECK-LABEL: 'urem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/AMDGPU/div.ll b/llvm/test/Analysis/CostModel/AMDGPU/div.ll index 406b67b9d45940..459c41a0b3eb5b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/div.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/div.ll @@ -337,59 +337,59 @@ define i32 @udiv_const() { define i32 @sdiv_uniformconst() { ; FAST-LABEL: 'sdiv_uniformconst' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'sdiv_uniformconst' ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'sdiv_uniformconst' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 7 @@ -418,59 +418,59 @@ define i32 @sdiv_uniformconst() { define i32 @udiv_uniformconst() { ; FAST-LABEL: 'udiv_uniformconst' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'udiv_uniformconst' ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'udiv_uniformconst' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 7 @@ -661,59 +661,59 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; FAST-LABEL: 'sdiv_uniformconstpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'sdiv_uniformconstpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'sdiv_uniformconstpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -742,59 +742,59 @@ define i32 @sdiv_uniformconstpow2() { define i32 @udiv_uniformconstpow2() { ; FAST-LABEL: 'udiv_uniformconstpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'udiv_uniformconstpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'udiv_uniformconstpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 @@ -985,59 +985,59 @@ define i32 @udiv_constnegpow2() { define i32 @sdiv_uniformconstnegpow2() { ; FAST-LABEL: 'sdiv_uniformconstnegpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'sdiv_uniformconstnegpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'sdiv_uniformconstnegpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, -16 @@ -1066,59 +1066,59 @@ define i32 @sdiv_uniformconstnegpow2() { define i32 @udiv_uniformconstnegpow2() { ; FAST-LABEL: 'udiv_uniformconstnegpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'udiv_uniformconstnegpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'udiv_uniformconstnegpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll index 911b4319eaa4e7..ab9d7f9dc859de 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll @@ -314,146 +314,146 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 { define amdgpu_kernel void @rcp_ieee() #0 { ; CIFASTF64-LABEL: 'rcp_ieee' ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CISLOWF64-LABEL: 'rcp_ieee' ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIFASTF64-LABEL: 'rcp_ieee' ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SISLOWF64-LABEL: 'rcp_ieee' ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'rcp_ieee' ; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; FP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CI-SIZE-LABEL: 'rcp_ieee' ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SI-SIZE-LABEL: 'rcp_ieee' ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'rcp_ieee' ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fdiv half 1.0, undef @@ -477,146 +477,146 @@ define amdgpu_kernel void @rcp_ieee() #0 { define amdgpu_kernel void @rcp_ftzdaz() #1 { ; CIFASTF64-LABEL: 'rcp_ftzdaz' ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CISLOWF64-LABEL: 'rcp_ftzdaz' ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIFASTF64-LABEL: 'rcp_ftzdaz' ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SISLOWF64-LABEL: 'rcp_ftzdaz' ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'rcp_ftzdaz' ; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; FP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CI-SIZE-LABEL: 'rcp_ftzdaz' ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SI-SIZE-LABEL: 'rcp_ftzdaz' ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'rcp_ftzdaz' ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fdiv half 1.0, undef diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll index 9af5dd352d5883..734e76bd4053f5 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll @@ -139,24 +139,24 @@ define void @fneg_f64() { define i32 @fneg_idiom(i32 %arg) { ; CHECK-LABEL: 'fneg_idiom' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = fsub <2 x double> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = fsub <4 x double> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F64 = fsub <8 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SIZE-LABEL: 'fneg_idiom' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> , undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8F64 = fsub <8 x double> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = fsub float -0.0, undef diff --git a/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll b/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll index 1bd73056e5b83d..32e8ba51d6887e 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll @@ -32,22 +32,22 @@ define void @vecop() { ; CHECK-THROUGHPUT-LABEL: 'vecop' ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sand = select <2 x i1> undef, <2 x i1> undef, <2 x i1> zeroinitializer ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2band = and <2 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> , <2 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> splat (i1 true), <2 x i1> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bor = or <2 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4band = and <4 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bor = or <4 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'vecop' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sand = select <2 x i1> undef, <2 x i1> undef, <2 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2band = and <2 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> , <2 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> splat (i1 true), <2 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bor = or <2 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll index e6193791ff53a0..ca5d2fdd938b56 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll @@ -215,78 +215,78 @@ define i32 @mul_constpow2() { define i32 @mul_uniformconstpow2() { ; SLOW16-LABEL: 'mul_uniformconstpow2' ; SLOW16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; FAST16-LABEL: 'mul_uniformconstpow2' ; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW16-SIZE-LABEL: 'mul_uniformconstpow2' ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; FAST16-SIZE-LABEL: 'mul_uniformconstpow2' ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, 16 @@ -415,78 +415,78 @@ define i32 @mul_constnegpow2() { define i32 @mul_uniformconstnegpow2() { ; SLOW16-LABEL: 'mul_uniformconstnegpow2' ; SLOW16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16 -; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, -; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; FAST16-LABEL: 'mul_uniformconstnegpow2' ; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16 -; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, -; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW16-SIZE-LABEL: 'mul_uniformconstnegpow2' ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; FAST16-SIZE-LABEL: 'mul_uniformconstnegpow2' ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/AMDGPU/rem.ll b/llvm/test/Analysis/CostModel/AMDGPU/rem.ll index 87e35c7627de05..7e37566db44172 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/rem.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/rem.ll @@ -337,59 +337,59 @@ define i32 @urem_const() { define i32 @srem_uniformconst() { ; FAST-LABEL: 'srem_uniformconst' ; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'srem_uniformconst' ; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'srem_uniformconst' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -418,59 +418,59 @@ define i32 @srem_uniformconst() { define i32 @urem_uniformconst() { ; FAST-LABEL: 'urem_uniformconst' ; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'urem_uniformconst' ; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 7 -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'urem_uniformconst' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -661,59 +661,59 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; FAST-LABEL: 'srem_uniformconstpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'srem_uniformconstpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'srem_uniformconstpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -742,59 +742,59 @@ define i32 @srem_uniformconstpow2() { define i32 @urem_uniformconstpow2() { ; FAST-LABEL: 'urem_uniformconstpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'urem_uniformconstpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'urem_uniformconstpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 16 @@ -985,59 +985,59 @@ define i32 @urem_constnegpow2() { define i32 @srem_uniformconstnegpow2() { ; FAST-LABEL: 'srem_uniformconstnegpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'srem_uniformconstnegpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'srem_uniformconstnegpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, -16 @@ -1066,59 +1066,59 @@ define i32 @srem_uniformconstnegpow2() { define i32 @urem_uniformconstnegpow2() { ; FAST-LABEL: 'urem_uniformconstnegpow2' ; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, -16 -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'urem_uniformconstnegpow2' ; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, -16 -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, -; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; ALL-SIZE-LABEL: 'urem_uniformconstnegpow2' ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16 -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/ARM/divrem.ll b/llvm/test/Analysis/CostModel/ARM/divrem.ll index 36c25850323274..9f0c29c8bb0c65 100644 --- a/llvm/test/Analysis/CostModel/ARM/divrem.ll +++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll @@ -1031,98 +1031,98 @@ define void @vf64() { define void @vi8_2() { ; CHECK-NEON-LABEL: 'vi8_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi8_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi8_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi8_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi8_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i8> undef, @@ -1146,98 +1146,98 @@ define void @vi8_2() { define void @vi16_2() { ; CHECK-NEON-LABEL: 'vi16_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi16_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi16_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi16_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi16_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i16> undef, @@ -1261,98 +1261,98 @@ define void @vi16_2() { define void @vi32_2() { ; CHECK-NEON-LABEL: 'vi32_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi32_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi32_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi32_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi32_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i32> undef, @@ -1376,98 +1376,98 @@ define void @vi32_2() { define void @vi64_2() { ; CHECK-NEON-LABEL: 'vi64_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi64_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %t1 = sdiv <2 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %t2 = udiv <2 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t3 = srem <2 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t4 = urem <2 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f1 = sdiv <4 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f2 = udiv <4 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %f3 = srem <4 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %f4 = urem <4 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e1 = sdiv <8 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e2 = udiv <8 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %s1 = sdiv <16 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %s2 = udiv <16 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %s3 = srem <16 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi64_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi64_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi64_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i64> undef, @@ -1491,48 +1491,48 @@ define void @vi64_2() { define void @vf16_2() { ; CHECK-NEON-LABEL: 'vf16_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf16_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf16_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vf16_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vf16_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv <2 x half> undef, @@ -1546,48 +1546,48 @@ define void @vf16_2() { define void @vf32_2() { ; CHECK-NEON-LABEL: 'vf32_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf32_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf32_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vf32_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vf32_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv <2 x float> undef, @@ -1601,48 +1601,48 @@ define void @vf32_2() { define void @vf64_2() { ; CHECK-NEON-LABEL: 'vf64_2' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf64_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf64_2' -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vf64_2' -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vf64_2' -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00) +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00) ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv <2 x double> undef, diff --git a/llvm/test/Analysis/CostModel/ARM/logicalop.ll b/llvm/test/Analysis/CostModel/ARM/logicalop.ll index d70a4fac4d26c7..967426c727ea73 100644 --- a/llvm/test/Analysis/CostModel/ARM/logicalop.ll +++ b/llvm/test/Analysis/CostModel/ARM/logicalop.ll @@ -80,56 +80,56 @@ define void @vecop() { ; CHECK-MVE-RECIP-LABEL: 'vecop' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %band = and <4 x i1> undef, undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-RECIP-LABEL: 'vecop' ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-THUMB1-RECIP-LABEL: 'vecop' ; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-THUMB2-RECIP-LABEL: 'vecop' ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-MVE-SIZE-LABEL: 'vecop' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-NEON-SIZE-LABEL: 'vecop' ; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-THUMB1-SIZE-LABEL: 'vecop' ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-THUMB2-SIZE-LABEL: 'vecop' ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/PowerPC/logicalop.ll b/llvm/test/Analysis/CostModel/PowerPC/logicalop.ll index 495423b2c33458..3b0578d8f2d059 100644 --- a/llvm/test/Analysis/CostModel/PowerPC/logicalop.ll +++ b/llvm/test/Analysis/CostModel/PowerPC/logicalop.ll @@ -33,7 +33,7 @@ define void @vecop() { ; CHECK-SIZE-LABEL: 'vecop' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll index c976f483fdfec2..5afc199e52d43a 100644 --- a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll @@ -1173,9 +1173,9 @@ define void @add_of_constant() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = add <4 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = add <4 x i32> zeroinitializer, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = add <2 x i64> zeroinitializer, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> , undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> splat (i32 1), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> splat (i64 1), undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> splat (i32 4096), undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %8 = add <4 x i32> , undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = add <4 x i32> , undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = add <4 x i32> , undef @@ -1191,9 +1191,9 @@ define void @add_of_constant() { ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = add <4 x i32> undef, undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = add <4 x i32> zeroinitializer, undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = add <2 x i64> zeroinitializer, undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> , undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> , undef -; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> , undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> splat (i32 1), undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> splat (i64 1), undef +; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> splat (i32 4096), undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %8 = add <4 x i32> , undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = add <4 x i32> , undef ; SIFIVE-X280-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = add <4 x i32> , undef diff --git a/llvm/test/Analysis/CostModel/RISCV/logicalop.ll b/llvm/test/Analysis/CostModel/RISCV/logicalop.ll index 891c53b535dc61..80f66cfdc3e096 100644 --- a/llvm/test/Analysis/CostModel/RISCV/logicalop.ll +++ b/llvm/test/Analysis/CostModel/RISCV/logicalop.ll @@ -38,14 +38,14 @@ define void @vecop() { ; CHECK-THROUGHPUT-LABEL: 'vecop' ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'vecop' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll index 2448056076d8b4..6fc98da0eea97e 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll @@ -514,9 +514,9 @@ define void @store_of_constant(ptr %p) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr %p, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> , ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i64> , ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> , ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 1), ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i64> splat (i64 1), ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 4096), ptr %p, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> , ptr %p, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> , ptr %p, align 16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> , ptr %p, align 16 @@ -532,9 +532,9 @@ define void @store_of_constant(ptr %p) { ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr %p, align 16 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> , ptr %p, align 16 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> , ptr %p, align 32 -; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> , ptr %p, align 16 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 1), ptr %p, align 16 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> splat (i64 1), ptr %p, align 32 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 4096), ptr %p, align 16 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> , ptr %p, align 16 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> , ptr %p, align 16 ; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> , ptr %p, align 16 diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll index 67b99f573aad21..344a34fa9a6309 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll @@ -24,7 +24,7 @@ define <4 x i8> @phi_v4i8_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i8> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i8> [ splat (i8 1), %a ], [ splat (i8 2), %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %x ; br i1 %c, label %a, label %b @@ -79,7 +79,7 @@ define <4 x i8> @phi_v4i8_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i8> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i8> [ splat (i8 1), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %x ; br i1 %c, label %a, label %b @@ -97,7 +97,7 @@ define <4 x i16> @phi_v4i16_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i16> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i16> [ splat (i16 1), %a ], [ splat (i16 2), %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %x ; br i1 %c, label %a, label %b @@ -152,7 +152,7 @@ define <4 x i16> @phi_v4i16_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i16> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i16> [ splat (i16 1), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %x ; br i1 %c, label %a, label %b @@ -170,7 +170,7 @@ define <4 x i32> @phi_v4i32_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i32> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i32> [ splat (i32 1), %a ], [ splat (i32 2), %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %x ; br i1 %c, label %a, label %b @@ -225,7 +225,7 @@ define <4 x i32> @phi_v4i32_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i32> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i32> [ splat (i32 1), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %x ; br i1 %c, label %a, label %b @@ -243,7 +243,7 @@ define <4 x i64> @phi_v4i64_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i64> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i64> [ splat (i64 1), %a ], [ splat (i64 2), %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %x ; br i1 %c, label %a, label %b @@ -298,7 +298,7 @@ define <4 x i64> @phi_v4i64_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i64> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i64> [ splat (i64 1), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %x ; br i1 %c, label %a, label %b @@ -316,7 +316,7 @@ define <4 x half> @phi_v4f16_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x half> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x half> [ splat (half 0xH3C00), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %x ; br i1 %c, label %a, label %b @@ -353,7 +353,7 @@ define <4 x half> @phi_v4f16_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x half> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x half> [ splat (half 0xH3C00), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %x ; br i1 %c, label %a, label %b @@ -371,7 +371,7 @@ define <4 x float> @phi_v4f32_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x float> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x float> [ splat (float 1.000000e+00), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %x ; br i1 %c, label %a, label %b @@ -408,7 +408,7 @@ define <4 x float> @phi_v4f32_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x float> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x float> [ splat (float 1.000000e+00), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %x ; br i1 %c, label %a, label %b @@ -426,7 +426,7 @@ define <4 x double> @phi_v4f64_splat(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x double> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x double> [ splat (double 1.000000e+00), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %x ; br i1 %c, label %a, label %b @@ -463,7 +463,7 @@ define <4 x double> @phi_v4f64_cheap_and_expensive(i1 %c) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %d -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x double> [ , %a ], [ , %b ] +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x double> [ splat (double 1.000000e+00), %a ], [ , %b ] ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %x ; br i1 %c, label %a, label %b diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll index ef17d8dc60c145..6ab8ac64d64e4a 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll @@ -393,7 +393,7 @@ define void @select() { define void @select_of_constants() { ; CHECK-LABEL: 'select_of_constants' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = select i1 undef, <2 x i64> splat (i64 128), <2 x i64> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %3 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = select i1 undef, <2 x i64> , <2 x i64> diff --git a/llvm/test/Analysis/CostModel/SystemZ/divrem-pow2.ll b/llvm/test/Analysis/CostModel/SystemZ/divrem-pow2.ll index b43a46c882805f..ebc8d83b7ac6e2 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/divrem-pow2.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/divrem-pow2.ll @@ -85,7 +85,7 @@ define i8 @fun7(i8 %a) { define <2 x i64> @fun8(<2 x i64> %a) { ; COST-LABEL: 'fun8' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i64> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i64> %a, splat (i64 2) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = sdiv <2 x i64> %a, @@ -94,7 +94,7 @@ define <2 x i64> @fun8(<2 x i64> %a) { define <2 x i64> @fun9(<2 x i64> %a) { ; COST-LABEL: 'fun9' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i64> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i64> %a, splat (i64 -4) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = sdiv <2 x i64> %a, @@ -103,7 +103,7 @@ define <2 x i64> @fun9(<2 x i64> %a) { define <4 x i32> @fun10(<4 x i32> %a) { ; COST-LABEL: 'fun10' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i32> %a, splat (i32 8) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = sdiv <4 x i32> %a, @@ -112,7 +112,7 @@ define <4 x i32> @fun10(<4 x i32> %a) { define <4 x i32> @fun11(<4 x i32> %a) { ; COST-LABEL: 'fun11' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i32> %a, splat (i32 -16) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = sdiv <4 x i32> %a, @@ -121,7 +121,7 @@ define <4 x i32> @fun11(<4 x i32> %a) { define <2 x i32> @fun12(<2 x i32> %a) { ; COST-LABEL: 'fun12' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i32> %a, splat (i32 -16) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; %r = sdiv <2 x i32> %a, @@ -130,7 +130,7 @@ define <2 x i32> @fun12(<2 x i32> %a) { define <8 x i16> @fun13(<8 x i16> %a) { ; COST-LABEL: 'fun13' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r ; %r = sdiv <8 x i16> %a, @@ -139,7 +139,7 @@ define <8 x i16> @fun13(<8 x i16> %a) { define <8 x i16> @fun14(<8 x i16> %a) { ; COST-LABEL: 'fun14' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i16> %a, splat (i16 -64) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r ; %r = sdiv <8 x i16> %a, @@ -148,7 +148,7 @@ define <8 x i16> @fun14(<8 x i16> %a) { define <4 x i16> @fun15(<4 x i16> %a) { ; COST-LABEL: 'fun15' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %r ; %r = sdiv <4 x i16> %a, @@ -157,7 +157,7 @@ define <4 x i16> @fun15(<4 x i16> %a) { define <16 x i8> @fun16(<16 x i8> %a) { ; COST-LABEL: 'fun16' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <16 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <16 x i8> %a, splat (i8 64) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r ; %r = sdiv <16 x i8> %a, @@ -166,7 +166,7 @@ define <16 x i8> @fun16(<16 x i8> %a) { define <16 x i8> @fun17(<16 x i8> %a) { ; COST-LABEL: 'fun17' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <16 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <16 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r ; %r = sdiv <16 x i8> %a, @@ -175,7 +175,7 @@ define <16 x i8> @fun17(<16 x i8> %a) { define <8 x i8> @fun18(<8 x i8> %a) { ; COST-LABEL: 'fun18' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %r ; %r = sdiv <8 x i8> %a, @@ -224,7 +224,7 @@ define i8 @fun22(i8 %a) { define <2 x i64> @fun23(<2 x i64> %a) { ; COST-LABEL: 'fun23' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <2 x i64> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <2 x i64> %a, splat (i64 2) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = udiv <2 x i64> %a, @@ -233,7 +233,7 @@ define <2 x i64> @fun23(<2 x i64> %a) { define <4 x i32> @fun24(<4 x i32> %a) { ; COST-LABEL: 'fun24' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <4 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <4 x i32> %a, splat (i32 8) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = udiv <4 x i32> %a, @@ -242,7 +242,7 @@ define <4 x i32> @fun24(<4 x i32> %a) { define <2 x i32> @fun25(<2 x i32> %a) { ; COST-LABEL: 'fun25' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <2 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <2 x i32> %a, splat (i32 8) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; %r = udiv <2 x i32> %a, @@ -251,7 +251,7 @@ define <2 x i32> @fun25(<2 x i32> %a) { define <8 x i16> @fun26(<8 x i16> %a) { ; COST-LABEL: 'fun26' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <8 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <8 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r ; %r = udiv <8 x i16> %a, @@ -260,7 +260,7 @@ define <8 x i16> @fun26(<8 x i16> %a) { define <4 x i16> @fun27(<4 x i16> %a) { ; COST-LABEL: 'fun27' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <4 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <4 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %r ; %r = udiv <4 x i16> %a, @@ -269,7 +269,7 @@ define <4 x i16> @fun27(<4 x i16> %a) { define <16 x i8> @fun28(<16 x i8> %a) { ; COST-LABEL: 'fun28' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <16 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <16 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r ; %r = udiv <16 x i8> %a, @@ -278,7 +278,7 @@ define <16 x i8> @fun28(<16 x i8> %a) { define <8 x i8> @fun29(<8 x i8> %a) { ; COST-LABEL: 'fun29' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <8 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <8 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %r ; %r = udiv <8 x i8> %a, @@ -363,7 +363,7 @@ define i8 @fun37(i8 %a) { define <2 x i64> @fun38(<2 x i64> %a) { ; COST-LABEL: 'fun38' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <2 x i64> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <2 x i64> %a, splat (i64 2) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = srem <2 x i64> %a, @@ -372,7 +372,7 @@ define <2 x i64> @fun38(<2 x i64> %a) { define <2 x i64> @fun39(<2 x i64> %a) { ; COST-LABEL: 'fun39' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <2 x i64> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <2 x i64> %a, splat (i64 -4) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = srem <2 x i64> %a, @@ -381,7 +381,7 @@ define <2 x i64> @fun39(<2 x i64> %a) { define <4 x i32> @fun40(<4 x i32> %a) { ; COST-LABEL: 'fun40' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <4 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <4 x i32> %a, splat (i32 8) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = srem <4 x i32> %a, @@ -390,7 +390,7 @@ define <4 x i32> @fun40(<4 x i32> %a) { define <4 x i32> @fun41(<4 x i32> %a) { ; COST-LABEL: 'fun41' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <4 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <4 x i32> %a, splat (i32 -16) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = srem <4 x i32> %a, @@ -399,7 +399,7 @@ define <4 x i32> @fun41(<4 x i32> %a) { define <2 x i32> @fun42(<2 x i32> %a) { ; COST-LABEL: 'fun42' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <2 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <2 x i32> %a, splat (i32 -16) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; %r = srem <2 x i32> %a, @@ -408,7 +408,7 @@ define <2 x i32> @fun42(<2 x i32> %a) { define <8 x i16> @fun43(<8 x i16> %a) { ; COST-LABEL: 'fun43' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <8 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <8 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r ; %r = srem <8 x i16> %a, @@ -417,7 +417,7 @@ define <8 x i16> @fun43(<8 x i16> %a) { define <8 x i16> @fun44(<8 x i16> %a) { ; COST-LABEL: 'fun44' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <8 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <8 x i16> %a, splat (i16 -64) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r ; %r = srem <8 x i16> %a, @@ -426,7 +426,7 @@ define <8 x i16> @fun44(<8 x i16> %a) { define <4 x i16> @fun45(<4 x i16> %a) { ; COST-LABEL: 'fun45' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <4 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <4 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %r ; %r = srem <4 x i16> %a, @@ -435,7 +435,7 @@ define <4 x i16> @fun45(<4 x i16> %a) { define <16 x i8> @fun46(<16 x i8> %a) { ; COST-LABEL: 'fun46' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <16 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <16 x i8> %a, splat (i8 64) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r ; %r = srem <16 x i8> %a, @@ -444,7 +444,7 @@ define <16 x i8> @fun46(<16 x i8> %a) { define <16 x i8> @fun47(<16 x i8> %a) { ; COST-LABEL: 'fun47' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <16 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <16 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r ; %r = srem <16 x i8> %a, @@ -453,7 +453,7 @@ define <16 x i8> @fun47(<16 x i8> %a) { define <8 x i8> @fun48(<8 x i8> %a) { ; COST-LABEL: 'fun48' -; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <8 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = srem <8 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %r ; %r = srem <8 x i8> %a, @@ -502,7 +502,7 @@ define i8 @fun52(i8 %a) { define <2 x i64> @fun53(<2 x i64> %a) { ; COST-LABEL: 'fun53' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <2 x i64> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <2 x i64> %a, splat (i64 2) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r ; %r = urem <2 x i64> %a, @@ -511,7 +511,7 @@ define <2 x i64> @fun53(<2 x i64> %a) { define <4 x i32> @fun54(<4 x i32> %a) { ; COST-LABEL: 'fun54' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <4 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <4 x i32> %a, splat (i32 8) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r ; %r = urem <4 x i32> %a, @@ -520,7 +520,7 @@ define <4 x i32> @fun54(<4 x i32> %a) { define <2 x i32> @fun55(<2 x i32> %a) { ; COST-LABEL: 'fun55' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <2 x i32> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <2 x i32> %a, splat (i32 8) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r ; %r = urem <2 x i32> %a, @@ -529,7 +529,7 @@ define <2 x i32> @fun55(<2 x i32> %a) { define <8 x i16> @fun56(<8 x i16> %a) { ; COST-LABEL: 'fun56' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <8 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <8 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r ; %r = urem <8 x i16> %a, @@ -538,7 +538,7 @@ define <8 x i16> @fun56(<8 x i16> %a) { define <4 x i16> @fun57(<4 x i16> %a) { ; COST-LABEL: 'fun57' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <4 x i16> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <4 x i16> %a, splat (i16 32) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %r ; %r = urem <4 x i16> %a, @@ -547,7 +547,7 @@ define <4 x i16> @fun57(<4 x i16> %a) { define <16 x i8> @fun58(<16 x i8> %a) { ; COST-LABEL: 'fun58' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <16 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <16 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r ; %r = urem <16 x i8> %a, @@ -556,7 +556,7 @@ define <16 x i8> @fun58(<16 x i8> %a) { define <8 x i8> @fun59(<8 x i8> %a) { ; COST-LABEL: 'fun59' -; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <8 x i8> %a, +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = urem <8 x i8> %a, splat (i8 -128) ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %r ; %r = urem <8 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/SystemZ/logicalop.ll b/llvm/test/Analysis/CostModel/SystemZ/logicalop.ll index 459ed649eb3f3d..3c3fa0b530dd80 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/logicalop.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/logicalop.ll @@ -30,14 +30,14 @@ define void @vecop() { ; CHECK-THROUGHPUT-LABEL: 'vecop' ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'vecop' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll index b965a726262e68..f5b434881436ed 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll @@ -130,46 +130,46 @@ define i32 @fsub(i32 %arg) { define i32 @fneg_idiom(i32 %arg) { ; SSE1-LABEL: 'fneg_idiom' ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE2-LABEL: 'fneg_idiom' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'fneg_idiom' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fneg_idiom' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = fsub float -0.0, undef diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll index c147bd2eef6e7e..74fecc6c4a5747 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll @@ -218,90 +218,90 @@ define i32 @fsub(i32 %arg) { define i32 @fneg_idiom(i32 %arg) { ; SSE1-LABEL: 'fneg_idiom' ; SSE1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE2-LABEL: 'fneg_idiom' ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fneg_idiom' ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'fneg_idiom' ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'fneg_idiom' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fneg_idiom' ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'fneg_idiom' ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> , undef +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> , undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'fneg_idiom' ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> , undef +; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> , undef +; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = fsub float -0.0, undef diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll index d9312ac05601de..a801a187b46cbd 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll @@ -196,79 +196,79 @@ define i32 @fsub(i32 %arg) { define i32 @fneg_idiom(i32 %arg) { ; SSE1-LABEL: 'fneg_idiom' ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE2-LABEL: 'fneg_idiom' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fneg_idiom' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'fneg_idiom' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fneg_idiom' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'fneg_idiom' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; GLM-LABEL: 'fneg_idiom' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = fsub float -0.0, undef diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll index 90871e3a3831c0..d26c856f270762 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -218,90 +218,90 @@ define i32 @fsub(i32 %arg) { define i32 @fneg_idiom(i32 %arg) { ; SSE1-LABEL: 'fneg_idiom' ; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE2-LABEL: 'fneg_idiom' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fneg_idiom' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'fneg_idiom' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'fneg_idiom' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fneg_idiom' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> , undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> , undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fneg_idiom' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> , undef -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> , undef +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'fneg_idiom' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> , undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> , undef -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> , undef +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = fsub float -0.0, undef diff --git a/llvm/test/Analysis/CostModel/X86/div-codesize.ll b/llvm/test/Analysis/CostModel/X86/div-codesize.ll index 801b5f4e9ec06e..88c4a5dc185037 100644 --- a/llvm/test/Analysis/CostModel/X86/div-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/div-codesize.ll @@ -186,21 +186,21 @@ define i32 @udiv_const() { define i32 @sdiv_uniformconst() { ; CHECK-LABEL: 'sdiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 7 @@ -229,21 +229,21 @@ define i32 @sdiv_uniformconst() { define i32 @udiv_uniformconst() { ; CHECK-LABEL: 'udiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 7 @@ -548,97 +548,97 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; SSE-LABEL: 'sdiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -667,97 +667,97 @@ define i32 @sdiv_uniformconstpow2() { define i32 @udiv_uniformconstpow2() { ; SSE-LABEL: 'udiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 @@ -872,21 +872,21 @@ define i32 @udiv_constnegpow2() { define i32 @sdiv_uniformconstnegpow2() { ; CHECK-LABEL: 'sdiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, -16 @@ -915,21 +915,21 @@ define i32 @sdiv_uniformconstnegpow2() { define i32 @udiv_uniformconstnegpow2() { ; CHECK-LABEL: 'udiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/div-latency.ll b/llvm/test/Analysis/CostModel/X86/div-latency.ll index 6afb9c790e3520..aada70124a9b47 100644 --- a/llvm/test/Analysis/CostModel/X86/div-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/div-latency.ll @@ -186,21 +186,21 @@ define i32 @udiv_const() { define i32 @sdiv_uniformconst() { ; CHECK-LABEL: 'sdiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 7 @@ -229,21 +229,21 @@ define i32 @sdiv_uniformconst() { define i32 @udiv_uniformconst() { ; CHECK-LABEL: 'udiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 7 @@ -586,116 +586,116 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; SSE-LABEL: 'sdiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'sdiv_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -724,116 +724,116 @@ define i32 @sdiv_uniformconstpow2() { define i32 @udiv_uniformconstpow2() { ; SSE-LABEL: 'udiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'udiv_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 @@ -948,21 +948,21 @@ define i32 @udiv_constnegpow2() { define i32 @sdiv_uniformconstnegpow2() { ; CHECK-LABEL: 'sdiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, -16 @@ -991,21 +991,21 @@ define i32 @sdiv_uniformconstnegpow2() { define i32 @udiv_uniformconstnegpow2() { ; CHECK-LABEL: 'udiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll index d54dae0a21a066..efb853e3721b54 100644 --- a/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll @@ -186,21 +186,21 @@ define i32 @udiv_const() { define i32 @sdiv_uniformconst() { ; CHECK-LABEL: 'sdiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 7 @@ -229,21 +229,21 @@ define i32 @sdiv_uniformconst() { define i32 @udiv_uniformconst() { ; CHECK-LABEL: 'udiv_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 7 @@ -548,97 +548,97 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; SSE-LABEL: 'sdiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -667,97 +667,97 @@ define i32 @sdiv_uniformconstpow2() { define i32 @udiv_uniformconstpow2() { ; SSE-LABEL: 'udiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 @@ -872,21 +872,21 @@ define i32 @udiv_constnegpow2() { define i32 @sdiv_uniformconstnegpow2() { ; CHECK-LABEL: 'sdiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, -16 @@ -915,21 +915,21 @@ define i32 @sdiv_uniformconstnegpow2() { define i32 @udiv_uniformconstnegpow2() { ; CHECK-LABEL: 'udiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = udiv i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/div.ll b/llvm/test/Analysis/CostModel/X86/div.ll index 4bbd70e87a1a95..8ffd6835004f91 100644 --- a/llvm/test/Analysis/CostModel/X86/div.ll +++ b/llvm/test/Analysis/CostModel/X86/div.ll @@ -376,97 +376,97 @@ define i32 @udiv_const() { define i32 @sdiv_uniformconst() { ; SSE-LABEL: 'sdiv_uniformconst' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconst' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconst' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconst' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconst' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 7 @@ -495,97 +495,97 @@ define i32 @sdiv_uniformconst() { define i32 @udiv_uniformconst() { ; SSE-LABEL: 'udiv_uniformconst' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconst' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconst' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_uniformconst' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconst' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 7 @@ -928,135 +928,135 @@ define i32 @udiv_constpow2() { define i32 @sdiv_uniformconstpow2() { ; SSE2-LABEL: 'sdiv_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sdiv_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'sdiv_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -1085,97 +1085,97 @@ define i32 @sdiv_uniformconstpow2() { define i32 @udiv_uniformconstpow2() { ; SSE-LABEL: 'udiv_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 @@ -1480,97 +1480,97 @@ define i32 @udiv_constnegpow2() { define i32 @sdiv_uniformconstnegpow2() { ; SSE-LABEL: 'sdiv_uniformconstnegpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sdiv_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sdiv_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, -16 @@ -1599,97 +1599,97 @@ define i32 @sdiv_uniformconstnegpow2() { define i32 @udiv_uniformconstnegpow2() { ; SSE-LABEL: 'udiv_uniformconstnegpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'udiv_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'udiv_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = udiv i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/fshl-codesize.ll b/llvm/test/Analysis/CostModel/X86/fshl-codesize.ll index 71927002b599fd..fccabe39cd218f 100644 --- a/llvm/test/Analysis/CostModel/X86/fshl-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/fshl-codesize.ll @@ -1244,86 +1244,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) @@ -1336,86 +1336,86 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) @@ -1428,86 +1428,86 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) @@ -1520,86 +1520,86 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) @@ -2602,51 +2602,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) @@ -2659,51 +2659,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) @@ -2716,79 +2716,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) @@ -2801,79 +2801,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshl-latency.ll b/llvm/test/Analysis/CostModel/X86/fshl-latency.ll index c40394ba9a7283..a82a07d8ce54d9 100644 --- a/llvm/test/Analysis/CostModel/X86/fshl-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/fshl-latency.ll @@ -1217,86 +1217,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) @@ -1309,79 +1309,79 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) @@ -1394,79 +1394,79 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) @@ -1479,79 +1479,79 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) @@ -2554,51 +2554,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) @@ -2611,51 +2611,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) @@ -2668,79 +2668,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) @@ -2753,79 +2753,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll index 7b0daf50485505..c5e99b36e87ab1 100644 --- a/llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll @@ -1244,86 +1244,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) @@ -1336,86 +1336,86 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) @@ -1428,86 +1428,86 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) @@ -1520,86 +1520,86 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) @@ -2786,79 +2786,79 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) @@ -2871,79 +2871,79 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) @@ -2956,79 +2956,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) @@ -3041,79 +3041,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll index 127dec0a1a6f78..0ee9fff1229e5c 100644 --- a/llvm/test/Analysis/CostModel/X86/fshl.ll +++ b/llvm/test/Analysis/CostModel/X86/fshl.ll @@ -1210,86 +1210,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7) @@ -1302,79 +1302,79 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5) @@ -1387,79 +1387,79 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3) @@ -1472,79 +1472,79 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3) @@ -2540,51 +2540,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7) @@ -2597,51 +2597,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5) @@ -2654,79 +2654,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3) @@ -2739,79 +2739,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshr-codesize.ll b/llvm/test/Analysis/CostModel/X86/fshr-codesize.ll index 92a20b938142a7..0e4b635e863512 100644 --- a/llvm/test/Analysis/CostModel/X86/fshr-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/fshr-codesize.ll @@ -1244,86 +1244,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) @@ -1336,86 +1336,86 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) @@ -1428,86 +1428,86 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) @@ -1520,86 +1520,86 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) @@ -2602,51 +2602,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) @@ -2659,51 +2659,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) @@ -2716,79 +2716,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) @@ -2801,79 +2801,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshr-latency.ll b/llvm/test/Analysis/CostModel/X86/fshr-latency.ll index 33fadef536bf74..b71fd6c38fc7f3 100644 --- a/llvm/test/Analysis/CostModel/X86/fshr-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/fshr-latency.ll @@ -1217,86 +1217,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) @@ -1309,79 +1309,79 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) @@ -1394,79 +1394,79 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) @@ -1479,79 +1479,79 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) @@ -2554,51 +2554,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) @@ -2611,51 +2611,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) @@ -2668,79 +2668,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) @@ -2753,79 +2753,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll index ef831328c480e7..9b848d42a6f288 100644 --- a/llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll @@ -1244,86 +1244,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) @@ -1336,86 +1336,86 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) @@ -1428,86 +1428,86 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) @@ -1520,86 +1520,86 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i8' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i8' ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) @@ -2786,79 +2786,79 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) @@ -2871,79 +2871,79 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) @@ -2956,79 +2956,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) @@ -3041,79 +3041,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll index 3c233b51053ddc..2b970ba50c086e 100644 --- a/llvm/test/Analysis/CostModel/X86/fshr.ll +++ b/llvm/test/Analysis/CostModel/X86/fshr.ll @@ -1210,86 +1210,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) { ; SSSE3-LABEL: 'splatconstant_funnel_i64' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'splatconstant_funnel_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i64' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i64' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7) @@ -1302,79 +1302,79 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i32' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i32' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i32' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5) @@ -1387,79 +1387,79 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3) @@ -1472,79 +1472,79 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) { ; SSE-LABEL: 'splatconstant_funnel_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_funnel_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_funnel_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_funnel_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_funnel_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3) @@ -2540,51 +2540,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i64' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i64' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7) @@ -2597,51 +2597,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'splatconstant_rotate_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i32' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i32' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i32' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5) @@ -2654,79 +2654,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i16' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i16' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i16' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i16' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i16' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3) @@ -2739,79 +2739,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) { ; SSE-LABEL: 'splatconstant_rotate_i8' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'splatconstant_rotate_i8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'splatconstant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatconstant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatconstant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatconstant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8' ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512VBMI2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatconstant_rotate_i8' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'splatconstant_rotate_i8' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; XOP-LABEL: 'splatconstant_rotate_i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; XOP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8' ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3)) +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3)) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3) diff --git a/llvm/test/Analysis/CostModel/X86/logicalop.ll b/llvm/test/Analysis/CostModel/X86/logicalop.ll index d932bcd040eff0..75890d26b44314 100644 --- a/llvm/test/Analysis/CostModel/X86/logicalop.ll +++ b/llvm/test/Analysis/CostModel/X86/logicalop.ll @@ -41,14 +41,14 @@ define void @vecop() { ; CHECK-THROUGHPUT-LABEL: 'vecop' ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'vecop' ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> , <4 x i1> undef +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll index 1e5c02afc2b3b4..47472d57228585 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll @@ -1850,31 +1850,31 @@ define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %s define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32_const_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32_const_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) @@ -1885,37 +1885,37 @@ define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; %sext_ind = sext <16 x i32> %ind to <16 x i64> @@ -2019,7 +2019,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask2' @@ -2027,7 +2027,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask2' @@ -2035,7 +2035,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -2043,7 +2043,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -2051,7 +2051,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask2' @@ -2059,7 +2059,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 @@ -2230,43 +2230,43 @@ define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { ; SSE2-LABEL: 'test_gather_4f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; %sext_ind = sext <4 x i32> %ind to <4 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll index 25d12da306aba3..e18a4d0467caf8 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -1850,31 +1850,31 @@ define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %s define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32_const_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32_const_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) @@ -1885,37 +1885,37 @@ define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; %sext_ind = sext <16 x i32> %ind to <16 x i64> @@ -2019,7 +2019,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask2' @@ -2027,7 +2027,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask2' @@ -2035,7 +2035,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -2043,7 +2043,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -2051,7 +2051,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask2' @@ -2059,7 +2059,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0 @@ -2230,43 +2230,43 @@ define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { ; SSE2-LABEL: 'test_gather_4f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; %sext_ind = sext <4 x i32> %ind to <4 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index 332d90ac4191c4..4ca1f7f419e6a5 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -1850,31 +1850,31 @@ define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %s define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32_const_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32_const_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) @@ -1885,37 +1885,37 @@ define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; %sext_ind = sext <16 x i32> %ind to <16 x i64> @@ -2019,7 +2019,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask2' @@ -2027,7 +2027,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask2' @@ -2035,7 +2035,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -2043,7 +2043,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -2051,7 +2051,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask2' @@ -2059,7 +2059,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 @@ -2230,43 +2230,43 @@ define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { ; SSE2-LABEL: 'test_gather_4f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; %sext_ind = sext <4 x i32> %ind to <4 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll index 14dc561edc34d3..4fb2f1a50a6ed2 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll @@ -1850,31 +1850,31 @@ define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %s define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32_const_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32_const_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) @@ -1885,37 +1885,37 @@ define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; %sext_ind = sext <16 x i32> %ind to <16 x i64> @@ -2019,7 +2019,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask2' @@ -2027,7 +2027,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask2' @@ -2035,7 +2035,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -2043,7 +2043,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -2051,7 +2051,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask2' @@ -2059,7 +2059,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 @@ -2230,43 +2230,43 @@ define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { ; SSE2-LABEL: 'test_gather_4f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; %sext_ind = sext <4 x i32> %ind to <4 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll index a030068dfaf525..6be81602ab15bd 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll @@ -1850,31 +1850,31 @@ define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %s define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { ; SSE2-LABEL: 'test_gather_4i32_const_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SSE42-LABEL: 'test_gather_4i32_const_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) @@ -1885,37 +1885,37 @@ define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; %sext_ind = sext <16 x i32> %ind to <16 x i64> @@ -2019,7 +2019,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask2' @@ -2027,7 +2027,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask2' @@ -2035,7 +2035,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -2043,7 +2043,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -2051,7 +2051,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; ; AVX512-LABEL: 'test_gather_16f32_const_mask2' @@ -2059,7 +2059,7 @@ define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res ; %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 @@ -2230,43 +2230,43 @@ define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { ; SSE2-LABEL: 'test_gather_4f32_const_mask' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SSE42-LABEL: 'test_gather_4f32_const_mask' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res ; %sext_ind = sext <4 x i32> %ind to <4 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/mul-codesize.ll b/llvm/test/Analysis/CostModel/X86/mul-codesize.ll index 4cf1df14387f86..75585cf0a6e85c 100644 --- a/llvm/test/Analysis/CostModel/X86/mul-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/mul-codesize.ll @@ -190,154 +190,154 @@ define i32 @mul_constpow2() { define i32 @mul_uniformconstpow2() { ; SSE2-LABEL: 'mul_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, 16 @@ -542,154 +542,154 @@ define i32 @mul_constnegpow2() { define i32 @mul_uniformconstnegpow2() { ; SSE2-LABEL: 'mul_uniformconstnegpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstnegpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstnegpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstnegpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/mul-latency.ll b/llvm/test/Analysis/CostModel/X86/mul-latency.ll index 176c3ed41bfa8e..9245f2c5740f36 100644 --- a/llvm/test/Analysis/CostModel/X86/mul-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/mul-latency.ll @@ -190,154 +190,154 @@ define i32 @mul_constpow2() { define i32 @mul_uniformconstpow2() { ; SSE2-LABEL: 'mul_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, 16 @@ -542,154 +542,154 @@ define i32 @mul_constnegpow2() { define i32 @mul_uniformconstnegpow2() { ; SSE2-LABEL: 'mul_uniformconstnegpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstnegpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstnegpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstnegpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll index 020b62291012cc..2d1bf23e9699c9 100644 --- a/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll @@ -190,154 +190,154 @@ define i32 @mul_constpow2() { define i32 @mul_uniformconstpow2() { ; SSE2-LABEL: 'mul_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, 16 @@ -542,154 +542,154 @@ define i32 @mul_constnegpow2() { define i32 @mul_uniformconstnegpow2() { ; SSE2-LABEL: 'mul_uniformconstnegpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstnegpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstnegpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstnegpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = mul i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/mul.ll b/llvm/test/Analysis/CostModel/X86/mul.ll index 7e51dbf60cf9b3..4602928a981eac 100644 --- a/llvm/test/Analysis/CostModel/X86/mul.ll +++ b/llvm/test/Analysis/CostModel/X86/mul.ll @@ -190,154 +190,154 @@ define i32 @mul_constpow2() { define i32 @mul_uniformconstpow2() { ; SSE2-LABEL: 'mul_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = mul i64 undef, 16 @@ -542,154 +542,154 @@ define i32 @mul_constnegpow2() { define i32 @mul_uniformconstnegpow2() { ; SSE2-LABEL: 'mul_uniformconstnegpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'mul_uniformconstnegpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'mul_uniformconstnegpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'mul_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'mul_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'mul_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'mul_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'mul_uniformconstnegpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16) +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = mul i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/rem-codesize.ll b/llvm/test/Analysis/CostModel/X86/rem-codesize.ll index b47580ba0abfd1..3c172dda6867b6 100644 --- a/llvm/test/Analysis/CostModel/X86/rem-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/rem-codesize.ll @@ -186,21 +186,21 @@ define i32 @urem_const() { define i32 @srem_uniformconst() { ; CHECK-LABEL: 'srem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -229,21 +229,21 @@ define i32 @srem_uniformconst() { define i32 @urem_uniformconst() { ; CHECK-LABEL: 'urem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -529,154 +529,154 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; SSE2-LABEL: 'srem_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'srem_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'srem_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -705,59 +705,59 @@ define i32 @srem_uniformconstpow2() { define i32 @urem_uniformconstpow2() { ; SSE-LABEL: 'urem_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'urem_uniformconstpow2' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512-LABEL: 'urem_uniformconstpow2' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 16 @@ -872,21 +872,21 @@ define i32 @urem_constnegpow2() { define i32 @srem_uniformconstnegpow2() { ; CHECK-LABEL: 'srem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, -16 @@ -915,21 +915,21 @@ define i32 @srem_uniformconstnegpow2() { define i32 @urem_uniformconstnegpow2() { ; CHECK-LABEL: 'urem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/rem-latency.ll b/llvm/test/Analysis/CostModel/X86/rem-latency.ll index 5264a75659d150..6c62ccb8df7188 100644 --- a/llvm/test/Analysis/CostModel/X86/rem-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/rem-latency.ll @@ -186,21 +186,21 @@ define i32 @urem_const() { define i32 @srem_uniformconst() { ; CHECK-LABEL: 'srem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -229,21 +229,21 @@ define i32 @srem_uniformconst() { define i32 @urem_uniformconst() { ; CHECK-LABEL: 'urem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -510,135 +510,135 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; SSE2-LABEL: 'srem_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'srem_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -667,59 +667,59 @@ define i32 @srem_uniformconstpow2() { define i32 @urem_uniformconstpow2() { ; SSE-LABEL: 'urem_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'urem_uniformconstpow2' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512-LABEL: 'urem_uniformconstpow2' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 16 @@ -834,21 +834,21 @@ define i32 @urem_constnegpow2() { define i32 @srem_uniformconstnegpow2() { ; CHECK-LABEL: 'srem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, -16 @@ -877,21 +877,21 @@ define i32 @srem_uniformconstnegpow2() { define i32 @urem_uniformconstnegpow2() { ; CHECK-LABEL: 'urem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll index 817a0c7e070ac6..e0f0674c71f604 100644 --- a/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll @@ -186,21 +186,21 @@ define i32 @urem_const() { define i32 @srem_uniformconst() { ; CHECK-LABEL: 'srem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -229,21 +229,21 @@ define i32 @srem_uniformconst() { define i32 @urem_uniformconst() { ; CHECK-LABEL: 'urem_uniformconst' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -510,135 +510,135 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; SSE2-LABEL: 'srem_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE42-LABEL: 'srem_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -667,59 +667,59 @@ define i32 @srem_uniformconstpow2() { define i32 @urem_uniformconstpow2() { ; SSE-LABEL: 'urem_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX-LABEL: 'urem_uniformconstpow2' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX512-LABEL: 'urem_uniformconstpow2' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, 16 @@ -834,21 +834,21 @@ define i32 @urem_constnegpow2() { define i32 @srem_uniformconstnegpow2() { ; CHECK-LABEL: 'srem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = srem i64 undef, -16 @@ -877,21 +877,21 @@ define i32 @srem_uniformconstnegpow2() { define i32 @urem_uniformconstnegpow2() { ; CHECK-LABEL: 'urem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = urem i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll index 0e799d63741aa8..4138046ff961ed 100644 --- a/llvm/test/Analysis/CostModel/X86/rem.ll +++ b/llvm/test/Analysis/CostModel/X86/rem.ll @@ -490,97 +490,97 @@ define i32 @urem_const() { define i32 @srem_uniformconst() { ; SSE-LABEL: 'srem_uniformconst' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconst' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconst' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconst' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconst' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -609,97 +609,97 @@ define i32 @srem_uniformconst() { define i32 @urem_uniformconst() { ; SSE-LABEL: 'urem_uniformconst' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'urem_uniformconst' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7 -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'urem_uniformconst' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'urem_uniformconst' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'urem_uniformconst' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -1004,173 +1004,173 @@ define i32 @urem_constpow2() { define i32 @srem_uniformconstpow2() { ; SSE2-LABEL: 'srem_uniformconstpow2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'srem_uniformconstpow2' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'srem_uniformconstpow2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_uniformconstpow2' ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2i64 = srem <2 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i64 = srem <4 x i64> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8i64 = srem <8 x i64> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = srem <8 x i32> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = srem <16 x i32> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, -; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'srem_uniformconstpow2' ; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16 -; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16) ; GLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16 -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16) ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16 -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, -; GLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, +; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16) +; GLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -1199,59 +1199,59 @@ define i32 @srem_uniformconstpow2() { define i32 @urem_uniformconstpow2() { ; SSE-LABEL: 'urem_uniformconstpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'urem_uniformconstpow2' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'urem_uniformconstpow2' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, 16 @@ -1594,97 +1594,97 @@ define i32 @urem_constnegpow2() { define i32 @srem_uniformconstnegpow2() { ; SSE-LABEL: 'srem_uniformconstnegpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'srem_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'srem_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, -16 @@ -1713,97 +1713,97 @@ define i32 @srem_uniformconstnegpow2() { define i32 @urem_uniformconstnegpow2() { ; SSE-LABEL: 'urem_uniformconstnegpow2' ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, -; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'urem_uniformconstnegpow2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'urem_uniformconstnegpow2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'urem_uniformconstnegpow2' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'urem_uniformconstnegpow2' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = urem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, -16 diff --git a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll index 204290bd4e40f2..39a74cf6509bcd 100644 --- a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll +++ b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll @@ -77,12 +77,12 @@ entry: define <4 x i32> @slm-costs_8_v4_zext_mul(<4 x i8> %a) { ; SLM-LABEL: 'slm-costs_8_v4_zext_mul' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, splat (i32 255) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; GLM-LABEL: 'slm-costs_8_v4_zext_mul' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %zext, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %zext, splat (i32 255) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; entry: @@ -242,12 +242,12 @@ entry: define <4 x i32> @slm-costs_16_v4_zext_mul(<4 x i16> %a) { ; SLM-LABEL: 'slm-costs_16_v4_zext_mul' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i16> %a to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %zext, +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %zext, splat (i32 65535) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; GLM-LABEL: 'slm-costs_16_v4_zext_mul' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i16> %a to <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, splat (i32 65535) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; entry: diff --git a/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll b/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll index 0986def7cef40d..495cc74b643c49 100644 --- a/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll @@ -10,7 +10,7 @@ define <4 x i32> @test1(<4 x i32> %a) { ; CHECK-LABEL: 'test1' -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <4 x i32> %a, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div ; %div = udiv <4 x i32> %a, @@ -19,19 +19,19 @@ define <4 x i32> @test1(<4 x i32> %a) { define <8 x i32> @test2(<8 x i32> %a) { ; SSE-LABEL: 'test2' -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %div = udiv <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %div = udiv <8 x i32> %a, splat (i32 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX1-LABEL: 'test2' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <8 x i32> %a, splat (i32 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX2-LABEL: 'test2' -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, splat (i32 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX512-LABEL: 'test2' -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, splat (i32 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; %div = udiv <8 x i32> %a, @@ -40,7 +40,7 @@ define <8 x i32> @test2(<8 x i32> %a) { define <8 x i16> @test3(<8 x i16> %a) { ; CHECK-LABEL: 'test3' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <8 x i16> %a, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div ; %div = udiv <8 x i16> %a, @@ -49,19 +49,19 @@ define <8 x i16> @test3(<8 x i16> %a) { define <16 x i16> @test4(<16 x i16> %a) { ; SSE-LABEL: 'test4' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <16 x i16> %a, splat (i16 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; ; AVX1-LABEL: 'test4' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = udiv <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = udiv <16 x i16> %a, splat (i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; ; AVX2-LABEL: 'test4' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i16> %a, splat (i16 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; ; AVX512-LABEL: 'test4' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i16> %a, splat (i16 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; %div = udiv <16 x i16> %a, @@ -70,7 +70,7 @@ define <16 x i16> @test4(<16 x i16> %a) { define <8 x i16> @test5(<8 x i16> %a) { ; CHECK-LABEL: 'test5' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i16> %a, splat (i16 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div ; %div = sdiv <8 x i16> %a, @@ -79,19 +79,19 @@ define <8 x i16> @test5(<8 x i16> %a) { define <16 x i16> @test6(<16 x i16> %a) { ; SSE-LABEL: 'test6' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <16 x i16> %a, splat (i16 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; ; AVX1-LABEL: 'test6' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <16 x i16> %a, splat (i16 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; ; AVX2-LABEL: 'test6' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i16> %a, splat (i16 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; ; AVX512-LABEL: 'test6' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i16> %a, splat (i16 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div ; %div = sdiv <16 x i16> %a, @@ -100,7 +100,7 @@ define <16 x i16> @test6(<16 x i16> %a) { define <16 x i8> @test7(<16 x i8> %a) { ; CHECK-LABEL: 'test7' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <16 x i8> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <16 x i8> %a, splat (i8 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div ; %div = sdiv <16 x i8> %a, @@ -109,7 +109,7 @@ define <16 x i8> @test7(<16 x i8> %a) { define <4 x i32> @test8(<4 x i32> %a) { ; CHECK-LABEL: 'test8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <4 x i32> %a, splat (i32 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div ; %div = sdiv <4 x i32> %a, @@ -118,19 +118,19 @@ define <4 x i32> @test8(<4 x i32> %a) { define <8 x i32> @test9(<8 x i32> %a) { ; SSE-LABEL: 'test9' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <8 x i32> %a, splat (i32 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX1-LABEL: 'test9' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <8 x i32> %a, splat (i32 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX2-LABEL: 'test9' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, splat (i32 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; ; AVX512-LABEL: 'test9' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, splat (i32 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div ; %div = sdiv <8 x i32> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll index 9ff975665f13b6..195c61921219f0 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll @@ -1391,23 +1391,23 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatconstant_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatconstant_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, @@ -1416,31 +1416,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1449,31 +1449,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1482,7 +1482,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, @@ -1491,31 +1491,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1524,31 +1524,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1557,7 +1557,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, @@ -1566,39 +1566,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1607,39 +1607,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1648,35 +1648,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, @@ -1685,35 +1685,35 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v32i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1722,35 +1722,35 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v64i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll index 67679f2cb85666..a3e4cf62764208 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll @@ -1509,19 +1509,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v2i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatconstant_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatconstant_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, @@ -1530,27 +1530,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1559,27 +1559,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1588,7 +1588,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, @@ -1597,27 +1597,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1626,27 +1626,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1655,7 +1655,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, @@ -1664,43 +1664,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1709,43 +1709,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1754,43 +1754,43 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, @@ -1799,43 +1799,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1844,43 +1844,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index efd378ee0b5a61..1bdab183e548c5 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -1509,19 +1509,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v2i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatconstant_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatconstant_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, @@ -1530,27 +1530,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1559,27 +1559,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1588,7 +1588,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, @@ -1597,27 +1597,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1626,27 +1626,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1655,7 +1655,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, @@ -1664,43 +1664,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1709,43 +1709,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1754,43 +1754,43 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, @@ -1799,43 +1799,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1844,43 +1844,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll index ab300779b4341c..f96d0d4ef43cc2 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll @@ -1469,23 +1469,23 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatconstant_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatconstant_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, @@ -1494,31 +1494,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1527,31 +1527,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1560,31 +1560,31 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, @@ -1593,31 +1593,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1626,31 +1626,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1659,31 +1659,31 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, @@ -1692,39 +1692,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1733,39 +1733,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1774,39 +1774,39 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, @@ -1815,39 +1815,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1856,39 +1856,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll index 1b51a2e0a1e6e5..c85f506d6b60ca 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll @@ -1415,23 +1415,23 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX-LABEL: 'splatconstant_shift_v2i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOP-LABEL: 'splatconstant_shift_v2i64' -; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, @@ -1440,31 +1440,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1473,31 +1473,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1506,7 +1506,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, @@ -1515,31 +1515,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1548,31 +1548,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1581,7 +1581,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, @@ -1590,39 +1590,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1631,39 +1631,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1672,35 +1672,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, @@ -1709,39 +1709,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1750,39 +1750,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll index 644fcbbfefdf95..dc6feadbbe17ad 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll @@ -1359,7 +1359,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, @@ -1368,31 +1368,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1401,31 +1401,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1434,7 +1434,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, @@ -1443,31 +1443,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1476,31 +1476,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1509,7 +1509,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, @@ -1518,39 +1518,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1559,39 +1559,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1600,35 +1600,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, @@ -1637,39 +1637,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1678,39 +1678,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll index 34ea1a644f6cca..348ffebf220e0a 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll @@ -1477,7 +1477,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, @@ -1486,27 +1486,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1515,27 +1515,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1544,7 +1544,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, @@ -1553,27 +1553,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1582,27 +1582,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1611,7 +1611,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, @@ -1620,43 +1620,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1665,43 +1665,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1710,23 +1710,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, @@ -1735,43 +1735,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1780,43 +1780,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index 93b844eddf1824..7a7187dc9d6429 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -1477,7 +1477,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, @@ -1486,27 +1486,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1515,27 +1515,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1544,7 +1544,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, @@ -1553,27 +1553,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1582,27 +1582,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1611,7 +1611,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, @@ -1620,43 +1620,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1665,43 +1665,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1710,23 +1710,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, @@ -1735,43 +1735,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1780,43 +1780,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll index f879a09d067e65..b9accec482622d 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll @@ -1473,27 +1473,27 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v2i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v2i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v2i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v2i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v2i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, @@ -1502,27 +1502,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1531,27 +1531,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1560,27 +1560,27 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, @@ -1589,27 +1589,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1618,27 +1618,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1647,27 +1647,27 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, @@ -1676,35 +1676,35 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1713,35 +1713,35 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1750,35 +1750,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, @@ -1787,35 +1787,35 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1824,35 +1824,35 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll index fe472342e21445..87c0aee4a564de 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll @@ -1375,7 +1375,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, @@ -1384,31 +1384,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1417,31 +1417,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1450,7 +1450,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, @@ -1459,31 +1459,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1492,31 +1492,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1525,7 +1525,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, @@ -1534,39 +1534,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1575,39 +1575,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1616,35 +1616,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, @@ -1653,39 +1653,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1694,39 +1694,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll index 1045b827da7cad..2cf49533438f22 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll @@ -1333,7 +1333,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, @@ -1342,31 +1342,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1375,31 +1375,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1408,7 +1408,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = shl <4 x i32> %a, @@ -1417,31 +1417,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1450,31 +1450,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1483,7 +1483,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, @@ -1492,39 +1492,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1533,39 +1533,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1574,35 +1574,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, @@ -1611,39 +1611,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -1652,39 +1652,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'splatconstant_shift_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; SSE42-LABEL: 'splatconstant_shift_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll index f093d1c8ca358a..0d4cd2648d8058 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll @@ -1557,7 +1557,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, @@ -1566,27 +1566,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1595,27 +1595,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1624,7 +1624,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = shl <4 x i32> %a, @@ -1633,27 +1633,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1662,27 +1662,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1691,7 +1691,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, @@ -1700,43 +1700,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1745,43 +1745,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1790,23 +1790,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, @@ -1815,43 +1815,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -1860,43 +1860,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll index 09521cfa2d23d3..c2cbc32e328ecb 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -1557,7 +1557,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, @@ -1566,27 +1566,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1595,27 +1595,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1624,7 +1624,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = shl <4 x i32> %a, @@ -1633,27 +1633,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1662,27 +1662,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1691,7 +1691,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, @@ -1700,43 +1700,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1745,43 +1745,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1790,23 +1790,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatconstant_shift_v16i8' -; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, @@ -1815,43 +1815,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v32i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -1860,43 +1860,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatconstant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll index 3ae71daf50a306..18fd15031d28aa 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll @@ -1433,27 +1433,27 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v2i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v2i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v2i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v2i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v2i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, @@ -1462,27 +1462,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1491,27 +1491,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1520,27 +1520,27 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = shl <4 x i32> %a, @@ -1549,27 +1549,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1578,27 +1578,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1607,27 +1607,27 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, @@ -1636,35 +1636,35 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1673,35 +1673,35 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1710,35 +1710,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, @@ -1747,35 +1747,35 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -1784,35 +1784,35 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll index 4256a73a7cf735..ad2d92681ffa9c 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll @@ -1459,7 +1459,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK-LABEL: 'splatconstant_shift_v2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, @@ -1468,27 +1468,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1497,27 +1497,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1526,7 +1526,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK-LABEL: 'splatconstant_shift_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift ; %shift = shl <4 x i32> %a, @@ -1535,27 +1535,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1564,27 +1564,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; ; AVX512-LABEL: 'splatconstant_shift_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1593,7 +1593,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK-LABEL: 'splatconstant_shift_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, @@ -1602,35 +1602,35 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1639,35 +1639,35 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i16' -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1676,31 +1676,31 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v16i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX-LABEL: 'splatconstant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v16i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v16i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, @@ -1709,35 +1709,35 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v32i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v32i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v32i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -1746,35 +1746,35 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; SSE-LABEL: 'splatconstant_shift_v64i8' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX1-LABEL: 'splatconstant_shift_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'splatconstant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'splatconstant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatconstant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8' -; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, +; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3) ; AVX512GFNI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, diff --git a/llvm/test/Analysis/DemandedBits/vectors-inseltpoison.ll b/llvm/test/Analysis/DemandedBits/vectors-inseltpoison.ll index 050e6f6735969f..863b08609fc60d 100644 --- a/llvm/test/Analysis/DemandedBits/vectors-inseltpoison.ll +++ b/llvm/test/Analysis/DemandedBits/vectors-inseltpoison.ll @@ -3,7 +3,7 @@ ; CHECK-DAG: DemandedBits: 0xff00 for %x = or <2 x i32> %a, zeroinitializer ; CHECK-DAG: DemandedBits: 0xff00 for %y = or <2 x i32> %b, zeroinitializer ; CHECK-DAG: DemandedBits: 0xff00 for %z = or <2 x i32> %x, %y -; CHECK-DAG: DemandedBits: 0xff for %u = lshr <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %u = lshr <2 x i32> %z, splat (i32 8) ; CHECK-DAG: DemandedBits: 0xff for %r = trunc <2 x i32> %u to <2 x i8> define <2 x i8> @test_basic(<2 x i32> %a, <2 x i32> %b) { %x = or <2 x i32> %a, zeroinitializer @@ -61,8 +61,8 @@ define <3 x i32> @test_shufflevector(<2 x i32> %a, <2 x i32> %b) { ; Shifts with splat shift amounts ; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer -; CHECK-DAG: DemandedBits: 0xf0 for %y = shl <2 x i32> %x, -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +; CHECK-DAG: DemandedBits: 0xf0 for %y = shl <2 x i32> %x, splat (i32 4) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, splat (i32 240) define <2 x i32> @test_shl(<2 x i32> %a) { %x = or <2 x i32> %a, zeroinitializer %y = shl <2 x i32> %x, @@ -71,8 +71,8 @@ define <2 x i32> @test_shl(<2 x i32> %a) { } ; CHECK-DAG: DemandedBits: 0xf00 for %x = or <2 x i32> %a, zeroinitializer -; CHECK-DAG: DemandedBits: 0xf0 for %y = ashr <2 x i32> %x, -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +; CHECK-DAG: DemandedBits: 0xf0 for %y = ashr <2 x i32> %x, splat (i32 4) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, splat (i32 240) define <2 x i32> @test_ashr(<2 x i32> %a) { %x = or <2 x i32> %a, zeroinitializer %y = ashr <2 x i32> %x, @@ -81,8 +81,8 @@ define <2 x i32> @test_ashr(<2 x i32> %a) { } ; CHECK-DAG: DemandedBits: 0xf00 for %x = or <2 x i32> %a, zeroinitializer -; CHECK-DAG: DemandedBits: 0xf0 for %y = lshr <2 x i32> %x, -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +; CHECK-DAG: DemandedBits: 0xf0 for %y = lshr <2 x i32> %x, splat (i32 4) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, splat (i32 240) define <2 x i32> @test_lshr(<2 x i32> %a) { %x = or <2 x i32> %a, zeroinitializer %y = lshr <2 x i32> %x, @@ -95,8 +95,8 @@ declare <2 x i32> @llvm.fshr.i32(<2 x i32>, <2 x i32>, <2 x i32>) ; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer ; CHECK-DAG: DemandedBits: 0xf0000000 for %y = or <2 x i32> %b, zeroinitializer -; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> splat (i32 4)) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, splat (i32 255) define <2 x i32> @test_fshl(<2 x i32> %a, <2 x i32> %b) { %x = or <2 x i32> %a, zeroinitializer %y = or <2 x i32> %b, zeroinitializer @@ -107,8 +107,8 @@ define <2 x i32> @test_fshl(<2 x i32> %a, <2 x i32> %b) { ; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer ; CHECK-DAG: DemandedBits: 0xf0000000 for %y = or <2 x i32> %b, zeroinitializer -; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> splat (i32 28)) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, splat (i32 255) define <2 x i32> @test_fshr(<2 x i32> %a, <2 x i32> %b) { %x = or <2 x i32> %a, zeroinitializer %y = or <2 x i32> %b, zeroinitializer diff --git a/llvm/test/Analysis/DemandedBits/vectors.ll b/llvm/test/Analysis/DemandedBits/vectors.ll index 79c0d2792a1a0c..c7bcd4e7b078af 100644 --- a/llvm/test/Analysis/DemandedBits/vectors.ll +++ b/llvm/test/Analysis/DemandedBits/vectors.ll @@ -4,7 +4,7 @@ ; CHECK-DAG: DemandedBits: 0xff00 for %x = or <2 x i32> %a, zeroinitializer ; CHECK-DAG: DemandedBits: 0xff00 for %y = or <2 x i32> %b, zeroinitializer ; CHECK-DAG: DemandedBits: 0xff00 for %z = or <2 x i32> %x, %y -; CHECK-DAG: DemandedBits: 0xff for %u = lshr <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %u = lshr <2 x i32> %z, splat (i32 8) ; CHECK-DAG: DemandedBits: 0xff for %r = trunc <2 x i32> %u to <2 x i8> define <2 x i8> @test_basic(<2 x i32> %a, <2 x i32> %b) { %x = or <2 x i32> %a, zeroinitializer @@ -66,8 +66,8 @@ define <3 x i32> @test_shufflevector(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: Printing analysis 'Demanded Bits Analysis' for function 'test_shl': ; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer -; CHECK-DAG: DemandedBits: 0xf0 for %y = shl <2 x i32> %x, -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +; CHECK-DAG: DemandedBits: 0xf0 for %y = shl <2 x i32> %x, splat (i32 4) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, splat (i32 240) define <2 x i32> @test_shl(<2 x i32> %a) { %x = or <2 x i32> %a, zeroinitializer %y = shl <2 x i32> %x, @@ -77,8 +77,8 @@ define <2 x i32> @test_shl(<2 x i32> %a) { ; CHECK-LABEL: Printing analysis 'Demanded Bits Analysis' for function 'test_ashr': ; CHECK-DAG: DemandedBits: 0xf00 for %x = or <2 x i32> %a, zeroinitializer -; CHECK-DAG: DemandedBits: 0xf0 for %y = ashr <2 x i32> %x, -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +; CHECK-DAG: DemandedBits: 0xf0 for %y = ashr <2 x i32> %x, splat (i32 4) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, splat (i32 240) define <2 x i32> @test_ashr(<2 x i32> %a) { %x = or <2 x i32> %a, zeroinitializer %y = ashr <2 x i32> %x, @@ -88,8 +88,8 @@ define <2 x i32> @test_ashr(<2 x i32> %a) { ; CHECK-LABEL: Printing analysis 'Demanded Bits Analysis' for function 'test_lshr': ; CHECK-DAG: DemandedBits: 0xf00 for %x = or <2 x i32> %a, zeroinitializer -; CHECK-DAG: DemandedBits: 0xf0 for %y = lshr <2 x i32> %x, -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, +; CHECK-DAG: DemandedBits: 0xf0 for %y = lshr <2 x i32> %x, splat (i32 4) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, splat (i32 240) define <2 x i32> @test_lshr(<2 x i32> %a) { %x = or <2 x i32> %a, zeroinitializer %y = lshr <2 x i32> %x, @@ -103,8 +103,8 @@ declare <2 x i32> @llvm.fshr.i32(<2 x i32>, <2 x i32>, <2 x i32>) ; CHECK-LABEL: Printing analysis 'Demanded Bits Analysis' for function 'test_fshl': ; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer ; CHECK-DAG: DemandedBits: 0xf0000000 for %y = or <2 x i32> %b, zeroinitializer -; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> splat (i32 4)) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, splat (i32 255) define <2 x i32> @test_fshl(<2 x i32> %a, <2 x i32> %b) { %x = or <2 x i32> %a, zeroinitializer %y = or <2 x i32> %b, zeroinitializer @@ -116,8 +116,8 @@ define <2 x i32> @test_fshl(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: Printing analysis 'Demanded Bits Analysis' for function 'test_fshr': ; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer ; CHECK-DAG: DemandedBits: 0xf0000000 for %y = or <2 x i32> %b, zeroinitializer -; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) -; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, +; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> splat (i32 28)) +; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, splat (i32 255) define <2 x i32> @test_fshr(<2 x i32> %a, <2 x i32> %b) { %x = or <2 x i32> %a, zeroinitializer %y = or <2 x i32> %b, zeroinitializer diff --git a/llvm/test/Analysis/ValueTracking/known-bits.ll b/llvm/test/Analysis/ValueTracking/known-bits.ll index 9d0b153d8ccfc3..5b71402a96f0df 100644 --- a/llvm/test/Analysis/ValueTracking/known-bits.ll +++ b/llvm/test/Analysis/ValueTracking/known-bits.ll @@ -3,7 +3,7 @@ define <4 x i1> @vec_reverse_known_bits(<4 x i8> %xx) { ; CHECK-LABEL: @vec_reverse_known_bits( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %x = or <4 x i8> %xx, %rev = call <4 x i8> @llvm.vector.reverse(<4 x i8> %x) diff --git a/llvm/test/Analysis/ValueTracking/known-fpclass.ll b/llvm/test/Analysis/ValueTracking/known-fpclass.ll index 2b8e6298d746ae..02289fd2641cf1 100644 --- a/llvm/test/Analysis/ValueTracking/known-fpclass.ll +++ b/llvm/test/Analysis/ValueTracking/known-fpclass.ll @@ -3,7 +3,7 @@ define <4 x i1> @vector_reverse_fpclass(<4 x double> nofpclass(nzero nan) %x) { ; CHECK-LABEL: @vector_reverse_fpclass( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) %op = call <4 x double> @llvm.vector.reverse(<4 x double> %x.abs) diff --git a/llvm/test/Analysis/ValueTracking/known-non-zero.ll b/llvm/test/Analysis/ValueTracking/known-non-zero.ll index db2c4f3a1ed650..f02ff583e5cbb5 100644 --- a/llvm/test/Analysis/ValueTracking/known-non-zero.ll +++ b/llvm/test/Analysis/ValueTracking/known-non-zero.ll @@ -210,7 +210,7 @@ define i1 @shl_nz_bounded_cnt(i32 %cnt, i32 %y) { define <2 x i1> @shl_nz_bounded_cnt_vec_todo_no_common_bit(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @shl_nz_bounded_cnt_vec_todo_no_common_bit( ; CHECK-NEXT: [[CNT:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[VAL:%.*]] = or <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[VAL:%.*]] = or <2 x i32> [[Y:%.*]], splat (i32 16) ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[VAL]], [[CNT]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i32> [[SHL]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -315,7 +315,7 @@ define i1 @lshr_nz_bounded_cnt_fail(i32 %cnt, i32 %y) { define <2 x i1> @ashr_nz_bounded_cnt_vec_fail(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @ashr_nz_bounded_cnt_vec_fail( -; CHECK-NEXT: [[CNT:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CNT:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 24) ; CHECK-NEXT: [[VAL:%.*]] = or <2 x i32> [[Y:%.*]], ; CHECK-NEXT: [[SHL:%.*]] = ashr <2 x i32> [[VAL]], [[CNT]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i32> [[SHL]], zeroinitializer @@ -648,7 +648,7 @@ define <2 x i1> @bitcast_veci8_to_veci16(<4 x i8> %xx, <2 x i16> %ind) { define <3 x i1> @bitcast_veci3_to_veci4_fail_not_multiple(<4 x i3> %xx, <3 x i4> %ind) { ; CHECK-LABEL: @bitcast_veci3_to_veci4_fail_not_multiple( -; CHECK-NEXT: [[XA:%.*]] = add nuw nsw <4 x i3> [[XX:%.*]], +; CHECK-NEXT: [[XA:%.*]] = add nuw nsw <4 x i3> [[XX:%.*]], splat (i3 1) ; CHECK-NEXT: [[X:%.*]] = bitcast <4 x i3> [[XA]] to <3 x i4> ; CHECK-NEXT: [[Z:%.*]] = or <3 x i4> [[X]], [[IND:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i4> [[Z]], zeroinitializer @@ -663,7 +663,7 @@ define <3 x i1> @bitcast_veci3_to_veci4_fail_not_multiple(<4 x i3> %xx, <3 x i4> define <4 x i1> @bitcast_fail_veci16_to_veci8(<2 x i16> %xx, <4 x i8> %ind) { ; CHECK-LABEL: @bitcast_fail_veci16_to_veci8( -; CHECK-NEXT: [[XA:%.*]] = add nuw nsw <2 x i16> [[XX:%.*]], +; CHECK-NEXT: [[XA:%.*]] = add nuw nsw <2 x i16> [[XX:%.*]], splat (i16 1) ; CHECK-NEXT: [[X:%.*]] = bitcast <2 x i16> [[XA]] to <4 x i8> ; CHECK-NEXT: [[Z:%.*]] = or <4 x i8> [[X]], [[IND:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[Z]], zeroinitializer @@ -1294,7 +1294,7 @@ false: define <2 x i1> @range_metadata_vec(ptr %p, <2 x i32> %x) { ; CHECK-LABEL: @range_metadata_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %v = load <2 x i32>, ptr %p, !range !{i32 1, i32 100} %or = or <2 x i32> %v, %x @@ -1377,7 +1377,7 @@ define i1 @neg_range_call(i8 %y) { define <2 x i1> @range_attr_vec(<2 x i8> range(i8 1, 0) %x, <2 x i8> %y) { ; CHECK-LABEL: @range_attr_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %or = or <2 x i8> %y, %x %cmp = icmp ne <2 x i8> %or, zeroinitializer @@ -1401,7 +1401,7 @@ declare range(i8 -1, 1) <2 x i8> @returns_contain_zero_range_helper_vec() define <2 x i1> @range_return_vec(<2 x i8> %y) { ; CHECK-LABEL: @range_return_vec( ; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @returns_non_zero_range_helper_vec() -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %x = call <2 x i8> @returns_non_zero_range_helper_vec() %or = or <2 x i8> %y, %x @@ -1427,7 +1427,7 @@ declare <2 x i8> @returns_i8_helper_vec() define <2 x i1> @range_call_vec(<2 x i8> %y) { ; CHECK-LABEL: @range_call_vec( ; CHECK-NEXT: [[X:%.*]] = call range(i8 1, 0) <2 x i8> @returns_i8_helper_vec() -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %x = call range(i8 1, 0) <2 x i8> @returns_i8_helper_vec() %or = or <2 x i8> %y, %x diff --git a/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll b/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll index fba907ab731b0b..f272d9009aed13 100644 --- a/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll @@ -91,10 +91,10 @@ define <2 x i1> @add_XY_or_bit0_is_one(<2 x i8> %x, <2 x i8> %C) nounwind { define <2 x i1> @sub_XY_and_bit0_is_zero_fail(<2 x i8> %x, <2 x i8> %C) nounwind { ; CHECK-LABEL: @sub_XY_and_bit0_is_zero_fail( -; CHECK-NEXT: [[C1:%.*]] = or <2 x i8> [[C:%.*]], +; CHECK-NEXT: [[C1:%.*]] = or <2 x i8> [[C:%.*]], splat (i8 8) ; CHECK-NEXT: [[Y:%.*]] = sub <2 x i8> [[X:%.*]], [[C1]] ; CHECK-NEXT: [[W:%.*]] = and <2 x i8> [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[W]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[W]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %C1 = or <2 x i8> %C, @@ -149,7 +149,7 @@ define <2 x i1> @sub_YX_xor_bit0_is_one_fail(<2 x i8> %x, <2 x i8> %C) nounwind ; CHECK-LABEL: @sub_YX_xor_bit0_is_one_fail( ; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i8> [[X:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i8> [[X]], [[TMP1]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP2]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP2]], splat (i8 -13) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %C1 = sub <2 x i8> %C, @@ -191,10 +191,10 @@ define i1 @add_YX_xor_bit0_is_one_fail(i8 %x, i8 %C) nounwind { define <2 x i1> @add_XY_or_bit0_is_one_fail(<2 x i8> %x, <2 x i8> %C) nounwind { ; CHECK-LABEL: @add_XY_or_bit0_is_one_fail( -; CHECK-NEXT: [[C1:%.*]] = add <2 x i8> [[C:%.*]], +; CHECK-NEXT: [[C1:%.*]] = add <2 x i8> [[C:%.*]], splat (i8 1) ; CHECK-NEXT: [[Y:%.*]] = add <2 x i8> [[C1]], [[X:%.*]] ; CHECK-NEXT: [[W:%.*]] = or <2 x i8> [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[W]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[W]], splat (i8 90) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %C1 = add <2 x i8> %C, @@ -219,7 +219,7 @@ define <2 x i32> @add_and_eval_vec(<2 x i32> %x, <2 x i32> %C) { define <2 x i32> @add_xor_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @add_xor_eval_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 1) ; %y = add <2 x i32> %x, %z = xor <2 x i32> %y, %x @@ -229,7 +229,7 @@ define <2 x i32> @add_xor_eval_vec(<2 x i32> %x) { define <2 x i32> @add_or_eval_vec(<2 x i32> %x, <2 x i32> %C) { ; CHECK-LABEL: @add_or_eval_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 1) ; %y = add <2 x i32> %x, %z = or <2 x i32> %y, %x diff --git a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll index 407100dec1201e..663de281f19ba4 100644 --- a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll @@ -16,7 +16,7 @@ define i1 @blsmsk_eq_is_false(i32 %x) { define <2 x i1> @blsmsk_ne_is_true_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_ne_is_true_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %x1 = or <2 x i32> %x, %x2 = sub <2 x i32> %x1, @@ -27,7 +27,7 @@ define <2 x i1> @blsmsk_ne_is_true_vec(<2 x i32> %x) { define <2 x i1> @blsmsk_ne_is_true_diff_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_ne_is_true_diff_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %x1 = or <2 x i32> %x, %x2 = sub <2 x i32> %x1, @@ -82,7 +82,7 @@ define i32 @blsmsk_add_eval(i32 %x) { define <2 x i32> @blsmsk_add_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_add_eval_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 33) ; %x1 = or <2 x i32> %x, %x2 = add <2 x i32> %x1, @@ -115,7 +115,7 @@ define i32 @blsmsk_or_eval(i32 %x) { define <2 x i32> @blsmsk_or_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_or_eval_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 33) ; %x1 = or <2 x i32> %x, %x2 = add <2 x i32> %x1, @@ -174,8 +174,8 @@ define i32 @blsmsk_and_eval2(i32 %x) { define <2 x i32> @blsmsk_and_eval3_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_and_eval3_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[X1]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 34) +; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[X1]], splat (i32 63) ; CHECK-NEXT: [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]] ; CHECK-NEXT: [[Z:%.*]] = and <2 x i32> [[X3]], ; CHECK-NEXT: ret <2 x i32> [[Z]] @@ -242,9 +242,9 @@ define <2 x i32> @blsmsk_add_eval_assume_vec(<2 x i32> %x) { ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP0]]) ; CHECK-NEXT: [[CMP1:%.*]] = extractelement <2 x i1> [[CMP]], i64 1 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]]) -; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[X]], +; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[X]], splat (i32 -1) ; CHECK-NEXT: [[X3:%.*]] = xor <2 x i32> [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = add <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = add <2 x i32> [[X3]], splat (i32 32) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %lb = and <2 x i32> %x, @@ -335,10 +335,10 @@ define i1 @blsi_ne_is_true(i32 %x) { define <2 x i1> @blsi_ge_is_false_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_ge_is_false_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 10) ; CHECK-NEXT: [[X2:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], splat (i32 7) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %x1 = or <2 x i32> %x, @@ -353,7 +353,7 @@ define <2 x i1> @blsi_ge_is_false_diff_vec(<2 x i32> %x) { ; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[X2:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], splat (i32 7) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %x1 = or <2 x i32> %x, @@ -399,7 +399,7 @@ define i32 @blsi_sub_eval(i32 %x) { define <2 x i32> @blsi_sub_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_sub_eval_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 -31) ; %x1 = or <2 x i32> %x, %x2 = sub <2 x i32> , %x1 @@ -421,7 +421,7 @@ define i32 @blsi_or_eval(i32 %x) { define <2 x i32> @blsi_xor_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_xor_eval_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 33) ; %x1 = or <2 x i32> %x, %x2 = sub <2 x i32> , %x1 @@ -443,10 +443,10 @@ define i32 @blsi_and_eval(i32 %x) { define <2 x i32> @blsi_and_eval2_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_and_eval2_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: [[X2:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = and <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = and <2 x i32> [[X3]], splat (i32 32) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %x1 = or <2 x i32> %x, @@ -473,7 +473,7 @@ define i32 @blsi_and_eval3(i32 %x) { define <2 x i1> @blsi_eq_is_false_assume_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_eq_is_false_assume_vec( -; CHECK-NEXT: [[LB:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[LB:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[LB]], zeroinitializer ; CHECK-NEXT: [[CMP0:%.*]] = extractelement <2 x i1> [[CMP]], i64 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP0]]) @@ -481,7 +481,7 @@ define <2 x i1> @blsi_eq_is_false_assume_vec(<2 x i32> %x) { ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]]) ; CHECK-NEXT: [[X2:%.*]] = sub <2 x i32> zeroinitializer, [[X]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = icmp eq <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = icmp eq <2 x i32> [[X3]], splat (i32 8) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %lb = and <2 x i32> %x, @@ -533,7 +533,7 @@ define <2 x i1> @blsi_cmp_eq_diff_bits_vec(<2 x i32> %x) { ; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[X2:%.*]] = sub <2 x i32> zeroinitializer, [[X1]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = icmp eq <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = icmp eq <2 x i32> [[X3]], splat (i32 32) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %x1 = or <2 x i32> %x, @@ -577,10 +577,10 @@ define i32 @blsi_and_eval_assume(i32 %x) { define <2 x i1> @blsmsk_ne_no_proof_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_ne_no_proof_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 10) ; CHECK-NEXT: [[X2:%.*]] = add nsw <2 x i32> [[X1]], ; CHECK-NEXT: [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = icmp ne <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = icmp ne <2 x i32> [[X3]], splat (i32 8) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %x1 = or <2 x i32> %x, @@ -592,10 +592,10 @@ define <2 x i1> @blsmsk_ne_no_proof_vec(<2 x i32> %x) { define <2 x i32> @blsmsk_add_noeval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_add_noeval_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 9) ; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[X1]], ; CHECK-NEXT: [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = add <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = add <2 x i32> [[X3]], splat (i32 32) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %x1 = or <2 x i32> %x, @@ -649,10 +649,10 @@ define i32 @blsmsk_add_no_eval2(i32 %x) { define <2 x i32> @blsmsk_xor_no_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_xor_no_eval_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 34) ; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[X1]], ; CHECK-NEXT: [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = xor <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = xor <2 x i32> [[X3]], splat (i32 32) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %x1 = or <2 x i32> %x, @@ -733,10 +733,10 @@ define i32 @blsi_or_no_eval(i32 %x) { define <2 x i32> @blsi_or_no_partial_eval_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsi_or_no_partial_eval_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: [[X2:%.*]] = sub nsw <2 x i32> , [[X1]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = or <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = or <2 x i32> [[X3]], splat (i32 32) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %x1 = or <2 x i32> %x, diff --git a/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll b/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll index e2fe873d715cd6..0d452829ae7578 100644 --- a/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll @@ -125,7 +125,7 @@ define <8 x i1> @hsub_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) { ; CHECK-LABEL: define <8 x i1> @hsub_and_eq_v8i16_sat( ; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %or1 = or <8 x i16> %x, @@ -171,7 +171,7 @@ define <16 x i1> @hsub_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) { ; CHECK-LABEL: define <16 x i1> @hsub_and_eq_v16i16_sat( ; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %or1 = or <16 x i16> %x, @@ -187,7 +187,7 @@ define <4 x i1> @hadd_shuffle_2st_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: define <4 x i1> @hadd_shuffle_2st_v4i32( ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %and1 = and <4 x i32> %x, @@ -202,7 +202,7 @@ define <4 x i1> @hadd_shuffle_4th_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: define <4 x i1> @hadd_shuffle_4th_v4i32( ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %and1 = and <4 x i32> %x, @@ -218,10 +218,10 @@ define <4 x i1> @hadd_shuffle_2st_negative_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[X]], -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[Y]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[Y]], splat (i32 3) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], +; CHECK-NEXT: [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[RET]] ; entry: @@ -237,11 +237,11 @@ define <4 x i1> @hadd_shuffle_4th_negative_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: define <4 x i1> @hadd_shuffle_4th_negative_v4i32( ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[X]], +; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[X]], splat (i32 3) ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[Y]], ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], +; CHECK-NEXT: [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[RET]] ; entry: diff --git a/llvm/test/Analysis/ValueTracking/knownzero-shift.ll b/llvm/test/Analysis/ValueTracking/knownzero-shift.ll index d4ed849c231f8f..3ce590c3a51ba0 100644 --- a/llvm/test/Analysis/ValueTracking/knownzero-shift.ll +++ b/llvm/test/Analysis/ValueTracking/knownzero-shift.ll @@ -32,8 +32,8 @@ define i32 @shl_shl(i32 %A) { define <2 x i33> @shl_shl_splat_vec(<2 x i33> %A) { ; CHECK-LABEL: @shl_shl_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], +; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> [[A:%.*]], splat (i33 5) +; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], splat (i33 28) ; CHECK-NEXT: ret <2 x i33> [[C]] ; %B = shl <2 x i33> %A, @@ -67,8 +67,8 @@ define i232 @lshr_lshr(i232 %A) { define <2 x i32> @lshr_lshr_splat_vec(<2 x i32> %A) { ; CHECK-LABEL: @lshr_lshr_splat_vec( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; CHECK-NEXT: [[B:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 28) +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 4) ; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = lshr <2 x i32> %A, diff --git a/llvm/test/Analysis/ValueTracking/numsignbits-shl.ll b/llvm/test/Analysis/ValueTracking/numsignbits-shl.ll index e86d28ebbc1d26..5224d75a157d5b 100644 --- a/llvm/test/Analysis/ValueTracking/numsignbits-shl.ll +++ b/llvm/test/Analysis/ValueTracking/numsignbits-shl.ll @@ -124,16 +124,16 @@ define void @numsignbits_shl_zext_all_bits_shifted_out(i8 %x) { define void @numsignbits_shl_zext_vector(<2 x i8> %x) { ; CHECK-LABEL: define void @numsignbits_shl_zext_vector( ; CHECK-SAME: <2 x i8> [[X:%.*]]) { -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i8> [[X]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i8> [[X]], splat (i8 5) ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[ASHR]] to <2 x i16> -; CHECK-NEXT: [[NSB4:%.*]] = shl <2 x i16> [[ZEXT]], -; CHECK-NEXT: [[ADD14:%.*]] = and <2 x i16> [[NSB4]], +; CHECK-NEXT: [[NSB4:%.*]] = shl <2 x i16> [[ZEXT]], splat (i16 10) +; CHECK-NEXT: [[ADD14:%.*]] = and <2 x i16> [[NSB4]], splat (i16 15360) ; CHECK-NEXT: call void @escape2(<2 x i16> [[ADD14]]) -; CHECK-NEXT: [[ADD13:%.*]] = and <2 x i16> [[NSB4]], +; CHECK-NEXT: [[ADD13:%.*]] = and <2 x i16> [[NSB4]], splat (i16 7168) ; CHECK-NEXT: call void @escape2(<2 x i16> [[ADD13]]) -; CHECK-NEXT: [[ADD12:%.*]] = and <2 x i16> [[NSB4]], +; CHECK-NEXT: [[ADD12:%.*]] = and <2 x i16> [[NSB4]], splat (i16 3072) ; CHECK-NEXT: call void @escape2(<2 x i16> [[ADD12]]) -; CHECK-NEXT: [[AND11:%.*]] = and <2 x i16> [[NSB4]], +; CHECK-NEXT: [[AND11:%.*]] = and <2 x i16> [[NSB4]], splat (i16 2048) ; CHECK-NEXT: [[ADD11:%.*]] = add nsw <2 x i16> [[AND11]], [[NSB4]] ; CHECK-NEXT: call void @escape2(<2 x i16> [[ADD11]]) ; CHECK-NEXT: ret void diff --git a/llvm/test/Assembler/ConstantExprFold.ll b/llvm/test/Assembler/ConstantExprFold.ll index 44a7511ea568b0..944e6f0236ecbf 100644 --- a/llvm/test/Assembler/ConstantExprFold.ll +++ b/llvm/test/Assembler/ConstantExprFold.ll @@ -41,8 +41,8 @@ ; CHECK: @gep2 = global <2 x ptr> undef ; CHECK: @gep3 = global <2 x ptr> zeroinitializer ; CHECK: @gep4 = global <2 x ptr> zeroinitializer -; CHECK: @bitcast1 = global <2 x i32> -; CHECK: @bitcast2 = global <4 x i16> +; CHECK: @bitcast1 = global <2 x i32> splat (i32 -1) +; CHECK: @bitcast2 = global <4 x i16> splat (i16 -1) ;. define void @dummy() { ; CHECK-LABEL: @dummy( diff --git a/llvm/test/Assembler/constant-splat.ll b/llvm/test/Assembler/constant-splat.ll index f3ec0c8340aa15..1c2831058b8870 100644 --- a/llvm/test/Assembler/constant-splat.ll +++ b/llvm/test/Assembler/constant-splat.ll @@ -6,47 +6,47 @@ @my_global = external global i32 -; CHECK: @constant.splat.i1 = constant <1 x i1> +; CHECK: @constant.splat.i1 = constant <1 x i1> splat (i1 true) @constant.splat.i1 = constant <1 x i1> splat (i1 true) -; CHECK: @constant.splat.i32 = constant <5 x i32> +; CHECK: @constant.splat.i32 = constant <5 x i32> splat (i32 7) @constant.splat.i32 = constant <5 x i32> splat (i32 7) -; CHECK: @constant.splat.i128 = constant <2 x i128> +; CHECK: @constant.splat.i128 = constant <2 x i128> splat (i128 85070591730234615870450834276742070272) @constant.splat.i128 = constant <2 x i128> splat (i128 85070591730234615870450834276742070272) -; CHECK: @constant.splat.f16 = constant <4 x half> +; CHECK: @constant.splat.f16 = constant <4 x half> splat (half 0xHBC00) @constant.splat.f16 = constant <4 x half> splat (half 0xHBC00) -; CHECK: @constant.splat.f32 = constant <5 x float> +; CHECK: @constant.splat.f32 = constant <5 x float> splat (float -2.000000e+00) @constant.splat.f32 = constant <5 x float> splat (float -2.000000e+00) -; CHECK: @constant.splat.f64 = constant <3 x double> +; CHECK: @constant.splat.f64 = constant <3 x double> splat (double -3.000000e+00) @constant.splat.f64 = constant <3 x double> splat (double -3.000000e+00) -; CHECK: @constant.splat.128 = constant <2 x fp128> +; CHECK: @constant.splat.128 = constant <2 x fp128> splat (fp128 0xL00000000000000018000000000000000) @constant.splat.128 = constant <2 x fp128> splat (fp128 0xL00000000000000018000000000000000) -; CHECK: @constant.splat.bf16 = constant <4 x bfloat> +; CHECK: @constant.splat.bf16 = constant <4 x bfloat> splat (bfloat 0xRC0A0) @constant.splat.bf16 = constant <4 x bfloat> splat (bfloat 0xRC0A0) -; CHECK: @constant.splat.x86_fp80 = constant <3 x x86_fp80> +; CHECK: @constant.splat.x86_fp80 = constant <3 x x86_fp80> splat (x86_fp80 0xK4000C8F5C28F5C28F800) @constant.splat.x86_fp80 = constant <3 x x86_fp80> splat (x86_fp80 0xK4000C8F5C28F5C28F800) -; CHECK: @constant.splat.ppc_fp128 = constant <1 x ppc_fp128> +; CHECK: @constant.splat.ppc_fp128 = constant <1 x ppc_fp128> splat (ppc_fp128 0xM80000000000000000000000000000000) @constant.splat.ppc_fp128 = constant <1 x ppc_fp128> splat (ppc_fp128 0xM80000000000000000000000000000000) ; CHECK: @constant.splat.global.ptr = constant <4 x ptr> @constant.splat.global.ptr = constant <4 x ptr> splat (ptr @my_global) define void @add_fixed_lenth_vector_splat_i32(<4 x i32> %a) { -; CHECK: %add = add <4 x i32> %a, +; CHECK: %add = add <4 x i32> %a, splat (i32 137) %add = add <4 x i32> %a, splat (i32 137) ret void } define <4 x i32> @ret_fixed_lenth_vector_splat_i32() { -; CHECK: ret <4 x i32> +; CHECK: ret <4 x i32> splat (i32 56) ret <4 x i32> splat (i32 56) } diff --git a/llvm/test/Assembler/opaque-ptr.ll b/llvm/test/Assembler/opaque-ptr.ll index 236a64904bc957..287b81d2d1c2f1 100644 --- a/llvm/test/Assembler/opaque-ptr.ll +++ b/llvm/test/Assembler/opaque-ptr.ll @@ -92,7 +92,7 @@ define <2 x ptr> @gep_constexpr_vec1(ptr %a) { } ; CHECK: define <2 x ptr> @gep_constexpr_vec2(<2 x ptr> %a) -; CHECK: ret <2 x ptr> getelementptr (i16, <2 x ptr> zeroinitializer, <2 x i32> ) +; CHECK: ret <2 x ptr> getelementptr (i16, <2 x ptr> zeroinitializer, <2 x i32> splat (i32 3)) define <2 x ptr> @gep_constexpr_vec2(<2 x ptr> %a) { ret <2 x ptr> getelementptr (i16, <2 x ptr> zeroinitializer, i32 3) } diff --git a/llvm/test/Bitcode/constantsTest.3.2.ll b/llvm/test/Bitcode/constantsTest.3.2.ll index f20b1b20c1544b..0b04686bab1d2e 100644 --- a/llvm/test/Bitcode/constantsTest.3.2.ll +++ b/llvm/test/Bitcode/constantsTest.3.2.ll @@ -47,7 +47,7 @@ entry: %res2 = extractvalue [2 x i32] [i32 1, i32 2], 0 ;const vector -; CHECK-NEXT: %res3 = add <2 x i32> , +; CHECK-NEXT: %res3 = add <2 x i32> splat (i32 1) %res3 = add <2 x i32> , ;zeroinitializer @@ -109,11 +109,11 @@ entry: icmp eq i32 1, 0 ; CHECK-NEXT: fcmp oeq float 1.000000e+00, 0.000000e+00 fcmp oeq float 1.0, 0.0 - ; CHECK-NEXT: extractelement <2 x i32> , i32 1 + ; CHECK-NEXT: extractelement <2 x i32> splat (i32 1) extractelement <2 x i32> , i32 1 - ; CHECK-NEXT: insertelement <2 x i32> , i32 0, i32 1 + ; CHECK-NEXT: insertelement <2 x i32> splat (i32 1), i32 0, i32 1 insertelement <2 x i32> , i32 0, i32 1 - ; CHECK-NEXT: shufflevector <2 x i32> , <2 x i32> zeroinitializer, <4 x i32> + ; CHECK-NEXT: shufflevector <2 x i32> splat (i32 1), <2 x i32> zeroinitializer, <4 x i32> shufflevector <2 x i32> , <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: extractvalue { i32, float } { i32 1, float 2.000000e+00 }, 0 extractvalue { i32, float } { i32 1, float 2.0 }, 0 @@ -121,4 +121,4 @@ entry: insertvalue { i32, float } { i32 1, float 2.0 }, i32 0, 0 ret void -} \ No newline at end of file +} diff --git a/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll b/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll index 840ed6a3c40d68..7b4a94a41f7d84 100644 --- a/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll +++ b/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll @@ -517,7 +517,7 @@ declare void @dummy(i64, i64, i64) define void @vectorPromotion() { ; OPTALL-LABEL: define void @vectorPromotion() { ; OPTALL-NEXT: [[ENTRY:.*:]] -; OPTALL-NEXT: [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, +; OPTALL-NEXT: [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, splat (i32 8) ; OPTALL-NEXT: [[B:%.*]] = zext <2 x i32> [[A]] to <2 x i64> ; OPTALL-NEXT: ret void ; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll index b69afa3ab1f3d6..7aaf599583c801 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll @@ -1357,17 +1357,17 @@ define amdgpu_kernel void @fdiv_fpmath_f32_vector(ptr addrspace(1) %out, <2 x fl define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath(ptr addrspace(1) %out, <2 x float> %x) { ; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> , [[X]] +; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> , [[X]], !fpmath [[META1:![0-9]+]] +; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META1:![0-9]+]] ; CHECK-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> , [[X]] +; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> , [[X]] +; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> , [[X]], !fpmath [[META0]] +; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META0]] ; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> , [[X]], !fpmath [[META0]] +; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META0]] ; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: ret void ; @@ -2114,7 +2114,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath( ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { ; IEEE-GOODFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) -; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> , [[SQRT_X_NO_MD]] +; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]] ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 @@ -2185,7 +2185,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath( ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { ; IEEE-BADFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) -; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> , [[SQRT_X_NO_MD]] +; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]] ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 @@ -2256,7 +2256,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath( ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { ; DAZ-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) -; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> , [[SQRT_X_NO_MD]] +; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]] ; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2:![0-9]+]] ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll index b8585120afa45f..e13f0fba2bdb7e 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll @@ -2144,7 +2144,7 @@ define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) { ; VI-LABEL: @bitreverse_3xi15( ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32> ; VI-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]]) -; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], +; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], splat (i32 17) ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15> ; VI-NEXT: store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8 ; VI-NEXT: ret void @@ -2842,7 +2842,7 @@ define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) { ; VI-LABEL: @bitreverse_3xi16( ; VI-NEXT: [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32> ; VI-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]]) -; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], +; VI-NEXT: [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], splat (i32 16) ; VI-NEXT: [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16> ; VI-NEXT: store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8 ; VI-NEXT: ret void diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index e4756ad3817c2e..d88719502d88fd 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -5594,7 +5594,7 @@ define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(ptr addrspace(1) %out, < define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @udiv_v2i32_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> splat (i32 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float @@ -5938,7 +5938,7 @@ define amdgpu_kernel void @urem_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3 define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @urem_v2i32_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> splat (i32 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float @@ -6393,7 +6393,7 @@ define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(ptr addrspace(1) %out, define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @sdiv_v2i32_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> splat (i32 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], 31 @@ -6852,7 +6852,7 @@ define amdgpu_kernel void @srem_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3 define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @srem_v2i32_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> splat (i32 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], 31 @@ -7334,7 +7334,7 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, < define amdgpu_kernel void @udiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @udiv_v2i64_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> splat (i64 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP1]], [[TMP2]] @@ -7592,7 +7592,7 @@ define amdgpu_kernel void @urem_v2i64_pow2k_denom(ptr addrspace(1) %out, <2 x i6 define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @urem_v2i64_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> splat (i64 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = urem i64 [[TMP1]], [[TMP2]] @@ -8261,7 +8261,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @sdiv_v2i64_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> splat (i64 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = sdiv i64 [[TMP1]], [[TMP2]] @@ -9341,7 +9341,7 @@ define amdgpu_kernel void @srem_v2i64_pow2k_denom(ptr addrspace(1) %out, <2 x i6 define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @srem_v2i64_pow2_shl_denom( -; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> , [[Y:%.*]] +; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> splat (i64 4096), [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = srem i64 [[TMP1]], [[TMP2]] diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll index d938c16bf6134c..296b817bc8f751 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll @@ -69,13 +69,13 @@ define <2 x i8> @mul_v1i16(<1 x i16> %arg) { ; ; VI-LABEL: @mul_v1i16( ; VI-NEXT: BB: -; VI-NEXT: [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], +; VI-NEXT: [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], splat (i16 42) ; VI-NEXT: [[CAST:%.*]] = bitcast <1 x i16> [[MUL]] to <2 x i8> ; VI-NEXT: ret <2 x i8> [[CAST]] ; ; DISABLED-LABEL: @mul_v1i16( ; DISABLED-NEXT: BB: -; DISABLED-NEXT: [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], +; DISABLED-NEXT: [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], splat (i16 42) ; DISABLED-NEXT: [[CAST:%.*]] = bitcast <1 x i16> [[MUL]] to <2 x i8> ; DISABLED-NEXT: ret <2 x i8> [[CAST]] ; @@ -97,12 +97,12 @@ define <1 x i8> @mul_v1i8(<1 x i8> %arg) { ; ; VI-LABEL: @mul_v1i8( ; VI-NEXT: BB: -; VI-NEXT: [[MUL:%.*]] = mul <1 x i8> [[ARG:%.*]], +; VI-NEXT: [[MUL:%.*]] = mul <1 x i8> [[ARG:%.*]], splat (i8 42) ; VI-NEXT: ret <1 x i8> [[MUL]] ; ; DISABLED-LABEL: @mul_v1i8( ; DISABLED-NEXT: BB: -; DISABLED-NEXT: [[MUL:%.*]] = mul <1 x i8> [[ARG:%.*]], +; DISABLED-NEXT: [[MUL:%.*]] = mul <1 x i8> [[ARG:%.*]], splat (i8 42) ; DISABLED-NEXT: ret <1 x i8> [[MUL]] ; BB: @@ -112,10 +112,10 @@ BB: define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; SI-LABEL: @smul24_v2i32( -; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], -; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], -; SI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], -; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], +; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], splat (i32 8) +; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], splat (i32 8) +; SI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], splat (i32 8) +; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], splat (i32 8) ; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0 ; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1 ; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0 @@ -127,10 +127,10 @@ define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; SI-NEXT: ret <2 x i32> [[MUL]] ; ; VI-LABEL: @smul24_v2i32( -; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], -; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], -; VI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], -; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], +; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], splat (i32 8) +; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], splat (i32 8) +; VI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], splat (i32 8) +; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], splat (i32 8) ; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0 ; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1 ; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0 @@ -142,10 +142,10 @@ define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; VI-NEXT: ret <2 x i32> [[MUL]] ; ; DISABLED-LABEL: @smul24_v2i32( -; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], -; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], -; DISABLED-NEXT: [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], -; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], +; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], splat (i32 8) +; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], splat (i32 8) +; DISABLED-NEXT: [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], splat (i32 8) +; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], splat (i32 8) ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i32> [[LHS24]], [[RHS24]] ; DISABLED-NEXT: ret <2 x i32> [[MUL]] ; @@ -184,8 +184,8 @@ define i32 @umul24_i32(i32 %lhs, i32 %rhs) { define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; SI-LABEL: @umul24_v2i32( -; SI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], -; SI-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], +; SI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], splat (i32 16777215) +; SI-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], splat (i32 16777215) ; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0 ; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1 ; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0 @@ -197,8 +197,8 @@ define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; SI-NEXT: ret <2 x i32> [[MUL]] ; ; VI-LABEL: @umul24_v2i32( -; VI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], -; VI-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], +; VI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], splat (i32 16777215) +; VI-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], splat (i32 16777215) ; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0 ; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1 ; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0 @@ -210,8 +210,8 @@ define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; VI-NEXT: ret <2 x i32> [[MUL]] ; ; DISABLED-LABEL: @umul24_v2i32( -; DISABLED-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], -; DISABLED-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], +; DISABLED-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], splat (i32 16777215) +; DISABLED-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], splat (i32 16777215) ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i32> [[LHS24]], [[RHS24]] ; DISABLED-NEXT: ret <2 x i32> [[MUL]] ; @@ -495,8 +495,8 @@ define i31 @umul24_i31(i31 %lhs, i31 %rhs) { define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { ; SI-LABEL: @umul24_v2i31( -; SI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], -; SI-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], +; SI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], splat (i31 16777215) +; SI-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], splat (i31 16777215) ; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0 ; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1 ; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0 @@ -514,8 +514,8 @@ define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { ; SI-NEXT: ret <2 x i31> [[MUL]] ; ; VI-LABEL: @umul24_v2i31( -; VI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], -; VI-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], +; VI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], splat (i31 16777215) +; VI-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], splat (i31 16777215) ; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0 ; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1 ; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0 @@ -533,8 +533,8 @@ define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { ; VI-NEXT: ret <2 x i31> [[MUL]] ; ; DISABLED-LABEL: @umul24_v2i31( -; DISABLED-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], -; DISABLED-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], +; DISABLED-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], splat (i31 16777215) +; DISABLED-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], splat (i31 16777215) ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i31> [[LHS24]], [[RHS24]] ; DISABLED-NEXT: ret <2 x i31> [[MUL]] ; @@ -546,10 +546,10 @@ define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { define <2 x i31> @smul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { ; SI-LABEL: @smul24_v2i31( -; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], -; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], -; SI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], -; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[SHL_RHS]], +; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], splat (i31 8) +; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], splat (i31 8) +; SI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], splat (i31 8) +; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[SHL_RHS]], splat (i31 8) ; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0 ; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1 ; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0 @@ -567,10 +567,10 @@ define <2 x i31> @smul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { ; SI-NEXT: ret <2 x i31> [[MUL]] ; ; VI-LABEL: @smul24_v2i31( -; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], -; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], -; VI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], -; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[SHL_RHS]], +; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], splat (i31 8) +; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], splat (i31 8) +; VI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], splat (i31 8) +; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[SHL_RHS]], splat (i31 8) ; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0 ; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1 ; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0 @@ -588,10 +588,10 @@ define <2 x i31> @smul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) { ; VI-NEXT: ret <2 x i31> [[MUL]] ; ; DISABLED-LABEL: @smul24_v2i31( -; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], -; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], -; DISABLED-NEXT: [[SHL_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], -; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[SHL_RHS]], +; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], splat (i31 8) +; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], splat (i31 8) +; DISABLED-NEXT: [[SHL_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], splat (i31 8) +; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[SHL_RHS]], splat (i31 8) ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i31> [[LHS24]], [[RHS24]] ; DISABLED-NEXT: ret <2 x i31> [[MUL]] ; @@ -733,10 +733,10 @@ define i32 @umul25_i32(i32 %lhs, i32 %rhs) { define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) { ; SI-LABEL: @smul24_v2i33( -; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], -; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], -; SI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], -; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[SHL_RHS]], +; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], splat (i33 9) +; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], splat (i33 9) +; SI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], splat (i33 9) +; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[SHL_RHS]], splat (i33 9) ; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0 ; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1 ; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0 @@ -754,10 +754,10 @@ define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) { ; SI-NEXT: ret <2 x i33> [[MUL]] ; ; VI-LABEL: @smul24_v2i33( -; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], -; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], -; VI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], -; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[SHL_RHS]], +; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], splat (i33 9) +; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], splat (i33 9) +; VI-NEXT: [[SHL_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], splat (i33 9) +; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[SHL_RHS]], splat (i33 9) ; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0 ; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1 ; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0 @@ -775,10 +775,10 @@ define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) { ; VI-NEXT: ret <2 x i33> [[MUL]] ; ; DISABLED-LABEL: @smul24_v2i33( -; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], -; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], -; DISABLED-NEXT: [[SHL_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], -; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[SHL_RHS]], +; DISABLED-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], splat (i33 9) +; DISABLED-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], splat (i33 9) +; DISABLED-NEXT: [[SHL_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], splat (i33 9) +; DISABLED-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[SHL_RHS]], splat (i33 9) ; DISABLED-NEXT: [[MUL:%.*]] = mul <2 x i33> [[LHS24]], [[RHS24]] ; DISABLED-NEXT: ret <2 x i33> [[MUL]] ; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll index a5bea7cd738f1d..6840c75644b977 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll @@ -100,7 +100,7 @@ define void @broken_phi() { ; GFX9-NEXT: bb: ; GFX9-NEXT: br label [[BB1:%.*]] ; GFX9: bb1: -; GFX9-NEXT: [[I:%.*]] = phi <4 x i8> [ , [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] +; GFX9-NEXT: [[I:%.*]] = phi <4 x i8> [ splat (i8 1), [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] ; GFX9-NEXT: br i1 false, label [[BB3:%.*]], label [[BB2:%.*]] ; GFX9: bb2: ; GFX9-NEXT: br label [[BB3]] @@ -118,7 +118,7 @@ define void @broken_phi() { ; GFX12-NEXT: bb: ; GFX12-NEXT: br label [[BB1:%.*]] ; GFX12: bb1: -; GFX12-NEXT: [[I:%.*]] = phi <4 x i8> [ , [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] +; GFX12-NEXT: [[I:%.*]] = phi <4 x i8> [ splat (i8 1), [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] ; GFX12-NEXT: br i1 false, label [[BB3:%.*]], label [[BB2:%.*]] ; GFX12: bb2: ; GFX12-NEXT: br label [[BB3]] diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll index acdab29e85b91a..b494ff8ba1f5dd 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll @@ -541,7 +541,7 @@ define float @test_pow_afn_f32_neg0.0(float %x) { define <2 x float> @test_pow_afn_v2f32_0.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_0.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) ret <2 x float> %pow @@ -550,7 +550,7 @@ define <2 x float> @test_pow_afn_v2f32_0.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg0.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg0.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) ret <2 x float> %pow @@ -559,7 +559,7 @@ define <2 x float> @test_pow_afn_v2f32_neg0.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) ret <2 x float> %pow @@ -568,7 +568,7 @@ define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0(<2 x float> %x) { define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: ret <3 x float> +; CHECK-NEXT: ret <3 x float> splat (float 1.000000e+00) ; %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %pow @@ -577,7 +577,7 @@ define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x floa define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: ret <3 x float> +; CHECK-NEXT: ret <3 x float> splat (float 1.000000e+00) ; %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %pow @@ -684,7 +684,7 @@ define <2 x float> @test_pow_afn_v2f32_1.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg1.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg1.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <2 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x float> [[__POWRECIP]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -713,7 +713,7 @@ define <3 x float> @test_pow_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x floa define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <3 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <3 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <3 x float> [[__POWRECIP]] ; %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) @@ -753,7 +753,7 @@ define <2 x float> @test_pow_afn_v2f32_2.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg2.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg2.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -793,7 +793,7 @@ define float @test_pow_afn_f32_neg3.0(float %x) { define <2 x float> @test_pow_afn_v2f32_3.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_3.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 3)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -803,7 +803,7 @@ define <2 x float> @test_pow_afn_v2f32_3.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg3.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg3.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -843,7 +843,7 @@ define float @test_pow_afn_f32_neg3.99(float %x) { define <2 x float> @test_pow_afn_v2f32_3.99(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_3.99 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0x400FEB8520000000)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -853,7 +853,7 @@ define <2 x float> @test_pow_afn_v2f32_3.99(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg3.99(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg3.99 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0xC00FEB8520000000)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -893,7 +893,7 @@ define float @test_pow_afn_f32_neg8.0(float %x) { define <2 x float> @test_pow_afn_v2f32_8.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_8.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 8)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -903,7 +903,7 @@ define <2 x float> @test_pow_afn_v2f32_8.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg8.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg8.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -8)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -943,7 +943,7 @@ define float @test_pow_afn_f32_neg12.0(float %x) { define <2 x float> @test_pow_afn_v2f32_12.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_12.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 12)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -953,7 +953,7 @@ define <2 x float> @test_pow_afn_v2f32_12.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg12.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg12.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -12)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -993,7 +993,7 @@ define float @test_pow_afn_f32_neg13.0(float %x) { define <2 x float> @test_pow_afn_v2f32_13.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_13.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 13)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1003,7 +1003,7 @@ define <2 x float> @test_pow_afn_v2f32_13.0(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_neg13.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg13.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -13)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1432,7 +1432,7 @@ define float @test_pow_f32__y_n_2_5(float %x) { define <2 x float> @test_pow_v2f32__y_0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> zeroinitializer) ret <2 x float> %pow @@ -1441,7 +1441,7 @@ define <2 x float> @test_pow_v2f32__y_0(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_n0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) ret <2 x float> %pow @@ -1459,7 +1459,7 @@ define <2 x float> @test_pow_v2f32__y_1(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_n1(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n1 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <2 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x float> [[__POWRECIP]] ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1479,7 +1479,7 @@ define <2 x float> @test_pow_v2f32__y_2(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_n2(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n2 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1509,7 +1509,7 @@ define <2 x float> @test_pow_v2f32__y_neg_half(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_3(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_3 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 3)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1519,7 +1519,7 @@ define <2 x float> @test_pow_v2f32__y_3(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_n3(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n3 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1529,7 +1529,7 @@ define <2 x float> @test_pow_v2f32__y_n3(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_2_5(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_2_5 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 2.500000e+00)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1539,7 +1539,7 @@ define <2 x float> @test_pow_v2f32__y_2_5(<2 x float> %x) { define <2 x float> @test_pow_v2f32__y_n_2_5(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n_2_5 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -2.500000e+00)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -1814,7 +1814,7 @@ define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4(<2 x float> %x) { define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 4.500000e+00)) ; CHECK-NEXT: ret <2 x float> [[POW]] ; %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) @@ -2003,7 +2003,7 @@ define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4(<2 x double> %x) { define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4_5(<2 x double> %x) { ; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4_5 ; CHECK-SAME: (<2 x double> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x double> @_Z3powDv2_dS_(<2 x double> [[X]], <2 x double> ) +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x double> @_Z3powDv2_dS_(<2 x double> [[X]], <2 x double> splat (double 4.500000e+00)) ; CHECK-NEXT: ret <2 x double> [[POW]] ; %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> ) @@ -2157,7 +2157,7 @@ define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4(<2 x half> %x) { define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4_5(<2 x half> %x) { ; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4_5 ; CHECK-SAME: (<2 x half> [[X:%.*]]) { -; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x half> @_Z3powDv2_DhS_(<2 x half> [[X]], <2 x half> ) +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x half> @_Z3powDv2_DhS_(<2 x half> [[X]], <2 x half> splat (half 0xH4480)) ; CHECK-NEXT: ret <2 x half> [[POW]] ; %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> ) @@ -2395,7 +2395,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa ; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]]) -; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> ; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32> @@ -2444,7 +2444,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa ; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]]) -; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> ; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll index bd4b86f0387666..f9c359bc114ed3 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll @@ -277,7 +277,7 @@ define <2 x float> @test_pown_v2f32__y_0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pown_v2f32__y_0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; entry: %call = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> %x, <2 x i32> zeroinitializer) @@ -288,7 +288,7 @@ define <2 x float> @test_pown_v2f32__y_0_undef(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pown_v2f32__y_0_undef ; CHECK-SAME: (<2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; entry: %call = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> %x, <2 x i32> ) @@ -299,7 +299,7 @@ define <3 x float> @test_pown_v3f32__y_0(<3 x float> %x) { ; CHECK-LABEL: define <3 x float> @test_pown_v3f32__y_0 ; CHECK-SAME: (<3 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <3 x float> +; CHECK-NEXT: ret <3 x float> splat (float 1.000000e+00) ; entry: %call = tail call <3 x float> @_Z4pownDv3_fDv3_i(<3 x float> %x, <3 x i32> zeroinitializer) @@ -310,7 +310,7 @@ define <4 x float> @test_pown_v4f32__y_0(<4 x float> %x) { ; CHECK-LABEL: define <4 x float> @test_pown_v4f32__y_0 ; CHECK-SAME: (<4 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x float> +; CHECK-NEXT: ret <4 x float> splat (float 1.000000e+00) ; entry: %call = tail call <4 x float> @_Z4pownDv4_fDv4_i(<4 x float> %x, <4 x i32> zeroinitializer) @@ -321,7 +321,7 @@ define <8 x float> @test_pown_v8f32__y_0(<8 x float> %x) { ; CHECK-LABEL: define <8 x float> @test_pown_v8f32__y_0 ; CHECK-SAME: (<8 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x float> +; CHECK-NEXT: ret <8 x float> splat (float 1.000000e+00) ; entry: %call = tail call <8 x float> @_Z4pownDv8_fDv8_i(<8 x float> %x, <8 x i32> zeroinitializer) @@ -332,7 +332,7 @@ define <16 x float> @test_pown_v16f32__y_0(<16 x float> %x) { ; CHECK-LABEL: define <16 x float> @test_pown_v16f32__y_0 ; CHECK-SAME: (<16 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x float> +; CHECK-NEXT: ret <16 x float> splat (float 1.000000e+00) ; entry: %call = tail call <16 x float> @_Z4pownDv16_fDv16_i(<16 x float> %x, <16 x i32> zeroinitializer) @@ -527,7 +527,7 @@ define <2 x float> @test_pown_v2f32__y_neg1(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pown_v2f32__y_neg1 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <2 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x float> [[__POWRECIP]] ; entry: @@ -539,7 +539,7 @@ define <3 x float> @test_pown_v3f32__y_neg1(<3 x float> %x) { ; CHECK-LABEL: define <3 x float> @test_pown_v3f32__y_neg1 ; CHECK-SAME: (<3 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <3 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <3 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <3 x float> [[__POWRECIP]] ; entry: @@ -551,7 +551,7 @@ define <3 x float> @test_pown_v3f32__y_neg1_undef(<3 x float> %x) { ; CHECK-LABEL: define <3 x float> @test_pown_v3f32__y_neg1_undef ; CHECK-SAME: (<3 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <3 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <3 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <3 x float> [[__POWRECIP]] ; entry: @@ -563,7 +563,7 @@ define <4 x float> @test_pown_v4f32__y_neg1(<4 x float> %x) { ; CHECK-LABEL: define <4 x float> @test_pown_v4f32__y_neg1 ; CHECK-SAME: (<4 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <4 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <4 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <4 x float> [[__POWRECIP]] ; entry: @@ -575,7 +575,7 @@ define <8 x float> @test_pown_v8f32__y_neg1(<8 x float> %x) { ; CHECK-LABEL: define <8 x float> @test_pown_v8f32__y_neg1 ; CHECK-SAME: (<8 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <8 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <8 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <8 x float> [[__POWRECIP]] ; entry: @@ -587,7 +587,7 @@ define <16 x float> @test_pown_v16f32__y_neg1(<16 x float> %x) { ; CHECK-LABEL: define <16 x float> @test_pown_v16f32__y_neg1 ; CHECK-SAME: (<16 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <16 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <16 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <16 x float> [[__POWRECIP]] ; entry: @@ -698,7 +698,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y) ; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float> ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]]) -; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], splat (i32 31) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> ; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32> @@ -744,7 +744,7 @@ define <2 x double> @test_pown_afn_nnan_ninf_v2f64(<2 x double> %x, <2 x i32> %y ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x double> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x double> @_Z4exp2Dv2_d(<2 x double> [[__YLOGX]]) ; CHECK-NEXT: [[__YTOU:%.*]] = zext <2 x i32> [[Y]] to <2 x i64> -; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU]], +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU]], splat (i64 63) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[X]] to <2 x i64> ; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i64> [[__YEVEN]], [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[__EXP2]] to <2 x i64> @@ -790,7 +790,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) { ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @llvm.exp2.v2f16(<2 x half> [[__YLOGX]]) ; CHECK-NEXT: [[__YTOU:%.*]] = trunc <2 x i32> [[Y]] to <2 x i16> -; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], splat (i16 15) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16> ; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16> @@ -1001,7 +1001,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32__y_neg3(<2 x float> %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[X]] ; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[__POWX2]] -; CHECK-NEXT: [[__1POWPROD:%.*]] = fdiv nnan ninf afn <2 x float> , [[__POWPROD]] +; CHECK-NEXT: [[__1POWPROD:%.*]] = fdiv nnan ninf afn <2 x float> splat (float 1.000000e+00), [[__POWPROD]] ; CHECK-NEXT: ret <2 x float> [[__1POWPROD]] ; entry: @@ -1015,7 +1015,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32__y_neg4(<2 x float> %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[X]] ; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn <2 x float> [[__POWX2]], [[__POWX2]] -; CHECK-NEXT: [[__1POWPROD:%.*]] = fdiv nnan ninf afn <2 x float> , [[__POWX21]] +; CHECK-NEXT: [[__1POWPROD:%.*]] = fdiv nnan ninf afn <2 x float> splat (float 1.000000e+00), [[__POWX21]] ; CHECK-NEXT: ret <2 x float> [[__1POWPROD]] ; entry: diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll index 1a92ca8960a776..c2b2c693e742ae 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll @@ -517,7 +517,7 @@ define float @test_powr_afn_f32_neg0.0(float %x) { define <2 x float> @test_powr_afn_v2f32_0.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_0.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) ret <2 x float> %powr @@ -526,7 +526,7 @@ define <2 x float> @test_powr_afn_v2f32_0.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg0.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg0.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) ret <2 x float> %powr @@ -545,7 +545,7 @@ define <2 x float> @test_powr_afn_v2f32_plus_minus_0.0(<2 x float> %x) { define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: ret <3 x float> +; CHECK-NEXT: ret <3 x float> splat (float 1.000000e+00) ; %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %powr @@ -554,7 +554,7 @@ define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x flo define <3 x float> @test_powr_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg0.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: ret <3 x float> +; CHECK-NEXT: ret <3 x float> splat (float 1.000000e+00) ; %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> ) ret <3 x float> %powr @@ -661,7 +661,7 @@ define <2 x float> @test_powr_afn_v2f32_1.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg1.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg1.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <2 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x float> [[__POWRECIP]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -690,7 +690,7 @@ define <3 x float> @test_powr_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x flo define <3 x float> @test_powr_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) { ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg1.0_splat_undef ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <3 x float> , [[X]] +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <3 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <3 x float> [[__POWRECIP]] ; %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> ) @@ -730,7 +730,7 @@ define <2 x float> @test_powr_afn_v2f32_2.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg2.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg2.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -2.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -770,7 +770,7 @@ define float @test_powr_afn_f32_neg3.0(float %x) { define <2 x float> @test_powr_afn_v2f32_3.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_3.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 3.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -780,7 +780,7 @@ define <2 x float> @test_powr_afn_v2f32_3.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg3.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg3.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -3.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -820,7 +820,7 @@ define float @test_powr_afn_f32_neg3.99(float %x) { define <2 x float> @test_powr_afn_v2f32_3.99(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_3.99 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0x400FEB8520000000)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -830,7 +830,7 @@ define <2 x float> @test_powr_afn_v2f32_3.99(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg3.99(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg3.99 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0xC00FEB8520000000)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -870,7 +870,7 @@ define float @test_powr_afn_f32_neg8.0(float %x) { define <2 x float> @test_powr_afn_v2f32_8.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_8.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 8.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -880,7 +880,7 @@ define <2 x float> @test_powr_afn_v2f32_8.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg8.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg8.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -8.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -920,7 +920,7 @@ define float @test_powr_afn_f32_neg12.0(float %x) { define <2 x float> @test_powr_afn_v2f32_12.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_12.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 1.200000e+01)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -930,7 +930,7 @@ define <2 x float> @test_powr_afn_v2f32_12.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg12.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg12.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -1.200000e+01)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -970,7 +970,7 @@ define float @test_powr_afn_f32_neg13.0(float %x) { define <2 x float> @test_powr_afn_v2f32_13.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_13.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 1.300000e+01)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) @@ -980,7 +980,7 @@ define <2 x float> @test_powr_afn_v2f32_13.0(<2 x float> %x) { define <2 x float> @test_powr_afn_v2f32_neg13.0(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg13.0 ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -1.300000e+01)) ; CHECK-NEXT: ret <2 x float> [[POWR]] ; %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> ) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll index 7932f8d1fc5beb..a2d5ce2d658b57 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll @@ -362,7 +362,7 @@ define <2 x half> @test_rootn_v2f16_2(<2 x half> %x) { define <2 x half> @test_rootn_v2f16_neg1(<2 x half> %x) { ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg1( ; CHECK-SAME: <2 x half> [[X:%.*]]) { -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <2 x half> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <2 x half> splat (half 0xH3C00), [[X]] ; CHECK-NEXT: ret <2 x half> [[__ROOTN2DIV]] ; %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> ) @@ -373,7 +373,7 @@ define <2 x half> @test_rootn_v2f16_neg2(<2 x half> %x) { ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg2( ; CHECK-SAME: <2 x half> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x half> @llvm.sqrt.v2f16(<2 x half> [[X]]) -; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = fdiv contract <2 x half> , [[TMP1]], !fpmath [[META0]] +; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = fdiv contract <2 x half> splat (half 0xH3C00), [[TMP1]], !fpmath [[META0]] ; CHECK-NEXT: ret <2 x half> [[__ROOTN2RSQRT]] ; %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> ) @@ -383,7 +383,7 @@ define <2 x half> @test_rootn_v2f16_neg2(<2 x half> %x) { define <2 x half> @test_rootn_v2f16_neg3(<2 x half> %x) { ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg3( ; CHECK-SAME: <2 x half> [[X:%.*]]) { -; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> ) +; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> splat (i32 -3)) ; CHECK-NEXT: ret <2 x half> [[CALL]] ; %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> ) @@ -536,7 +536,7 @@ define <2 x float> @test_rootn_v2f32__y_1__strictfp(<2 x float> %x) #1 { ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1__strictfp( ; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) #[[ATTR0]] +; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 1)) #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[CALL]] ; entry: @@ -795,7 +795,7 @@ define <2 x float> @test_rootn_v2f32__y_neg1(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg1( ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <2 x float> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x float> [[__ROOTN2DIV]] ; entry: @@ -807,7 +807,7 @@ define <3 x float> @test_rootn_v3f32__y_neg1(<3 x float> %x) { ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_neg1( ; CHECK-SAME: <3 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <3 x float> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <3 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <3 x float> [[__ROOTN2DIV]] ; entry: @@ -819,7 +819,7 @@ define <3 x float> @test_rootn_v3f32__y_neg1_undef(<3 x float> %x) { ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_neg1_undef( ; CHECK-SAME: <3 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <3 x float> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <3 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <3 x float> [[__ROOTN2DIV]] ; entry: @@ -831,7 +831,7 @@ define <4 x float> @test_rootn_v4f32__y_neg1(<4 x float> %x) { ; CHECK-LABEL: define <4 x float> @test_rootn_v4f32__y_neg1( ; CHECK-SAME: <4 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <4 x float> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <4 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <4 x float> [[__ROOTN2DIV]] ; entry: @@ -843,7 +843,7 @@ define <8 x float> @test_rootn_v8f32__y_neg1(<8 x float> %x) { ; CHECK-LABEL: define <8 x float> @test_rootn_v8f32__y_neg1( ; CHECK-SAME: <8 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <8 x float> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <8 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <8 x float> [[__ROOTN2DIV]] ; entry: @@ -855,7 +855,7 @@ define <16 x float> @test_rootn_v16f32__y_neg1(<16 x float> %x) { ; CHECK-LABEL: define <16 x float> @test_rootn_v16f32__y_neg1( ; CHECK-SAME: <16 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <16 x float> , [[X]] +; CHECK-NEXT: [[__ROOTN2DIV:%.*]] = fdiv <16 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <16 x float> [[__ROOTN2DIV]] ; entry: @@ -930,7 +930,7 @@ define <2 x float> @test_rootn_v2f32__y_neg2(<2 x float> %x) { ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) -; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = fdiv contract <2 x float> , [[TMP0]], !fpmath [[META0]] +; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[TMP0]], !fpmath [[META0]] ; CHECK-NEXT: ret <2 x float> [[__ROOTN2RSQRT]] ; entry: @@ -943,7 +943,7 @@ define <2 x float> @test_rootn_v2f32__y_neg2__flags(<2 x float> %x) { ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call nnan nsz contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) -; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = fdiv nnan nsz contract <2 x float> , [[TMP0]], !fpmath [[META0]] +; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = fdiv nnan nsz contract <2 x float> splat (float 1.000000e+00), [[TMP0]], !fpmath [[META0]] ; CHECK-NEXT: ret <2 x float> [[__ROOTN2RSQRT]] ; entry: @@ -955,7 +955,7 @@ define <2 x float> @test_rootn_v2f32__y_neg2__strictfp(<2 x float> %x) #1 { ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg2__strictfp( ; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) #[[ATTR0]] +; CHECK-NEXT: [[__ROOTN2RSQRT:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2)) #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[__ROOTN2RSQRT]] ; entry: @@ -1277,7 +1277,7 @@ define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_4(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_4( ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 4)) ; CHECK-NEXT: ret <2 x float> [[CALL]] ; entry: @@ -1289,7 +1289,7 @@ define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_neg3(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_neg3( ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3)) ; CHECK-NEXT: ret <2 x float> [[CALL]] ; entry: @@ -1301,7 +1301,7 @@ define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_neg4(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_neg4( ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -4)) ; CHECK-NEXT: ret <2 x float> [[CALL]] ; entry: @@ -1313,7 +1313,7 @@ define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_5(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_5( ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 5)) ; CHECK-NEXT: ret <2 x float> [[CALL]] ; entry: diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll index b212b9caf8400e..f6ee007facd7fd 100644 --- a/llvm/test/CodeGen/AMDGPU/fract-match.ll +++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll @@ -422,7 +422,7 @@ define <2 x float> @basic_fract_v2f32_nonans(<2 x float> nofpclass(nan) %x) { ; GFX6-IR-NEXT: entry: ; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]]) ; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]] -; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> ) +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> splat (float 0x3FEFFFFFE0000000)) ; GFX6-IR-NEXT: ret <2 x float> [[MIN]] ; ; IR-FRACT-LABEL: define <2 x float> @basic_fract_v2f32_nonans @@ -1244,7 +1244,7 @@ define <2 x half> @basic_fract_v2f16_nonan(<2 x half> nofpclass(nan) %x) { ; GFX6-IR-NEXT: entry: ; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) ; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]] -; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> ) +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF)) ; GFX6-IR-NEXT: ret <2 x half> [[MIN]] ; ; GFX7-IR-LABEL: define <2 x half> @basic_fract_v2f16_nonan @@ -1252,7 +1252,7 @@ define <2 x half> @basic_fract_v2f16_nonan(<2 x half> nofpclass(nan) %x) { ; GFX7-IR-NEXT: entry: ; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) ; GFX7-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]] -; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> ) +; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF)) ; GFX7-IR-NEXT: ret <2 x half> [[MIN]] ; ; IR-LEGALF16-LABEL: define <2 x half> @basic_fract_v2f16_nonan @@ -1668,11 +1668,11 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap ; GFX6-IR-NEXT: entry: ; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]]) ; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]] -; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> ) +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> splat (float 0x3FEFFFFFE0000000)) ; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x float> [[X]], zeroinitializer ; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x float> [[X]], <2 x float> [[MIN]] ; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) -; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], +; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], splat (float 0x7FF0000000000000) ; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]] ; GFX6-IR-NEXT: store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; GFX6-IR-NEXT: ret <2 x float> [[COND6]] @@ -1688,7 +1688,7 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap ; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0 ; IR-FRACT-NEXT: [[COND:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1 ; IR-FRACT-NEXT: [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) -; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], +; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], splat (float 0x7FF0000000000000) ; IR-FRACT-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]] ; IR-FRACT-NEXT: store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; IR-FRACT-NEXT: ret <2 x float> [[COND6]] @@ -2021,11 +2021,11 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX6-IR-NEXT: entry: ; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) ; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]] -; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> ) +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF)) ; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer ; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]] ; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]]) -; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], +; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], splat (half 0xH7C00) ; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]] ; GFX6-IR-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; GFX6-IR-NEXT: ret <2 x half> [[COND6]] @@ -2035,11 +2035,11 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX7-IR-NEXT: entry: ; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) ; GFX7-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]] -; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> ) +; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF)) ; GFX7-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer ; GFX7-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]] ; GFX7-IR-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]]) -; GFX7-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], +; GFX7-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], splat (half 0xH7C00) ; GFX7-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]] ; GFX7-IR-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; GFX7-IR-NEXT: ret <2 x half> [[COND6]] @@ -2055,7 +2055,7 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; IR-LEGALF16-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[TMP2]], i64 0 ; IR-LEGALF16-NEXT: [[COND:%.*]] = insertelement <2 x half> [[TMP4]], half [[TMP3]], i64 1 ; IR-LEGALF16-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]]) -; IR-LEGALF16-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], +; IR-LEGALF16-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], splat (half 0xH7C00) ; IR-LEGALF16-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]] ; IR-LEGALF16-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; IR-LEGALF16-NEXT: ret <2 x half> [[COND6]] @@ -2187,11 +2187,11 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc ; GFX6-IR-NEXT: entry: ; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[X]]) ; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x double> [[X]], [[FLOOR]] -; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> [[SUB]], <2 x double> ) +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> [[SUB]], <2 x double> splat (double 0x3FEFFFFFFFFFFFFF)) ; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x double> [[X]], zeroinitializer ; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x double> [[X]], <2 x double> [[MIN]] ; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]]) -; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], +; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], splat (double 0x7FF0000000000000) ; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]] ; GFX6-IR-NEXT: store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; GFX6-IR-NEXT: ret <2 x double> [[COND6]] @@ -2207,7 +2207,7 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc ; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i64 0 ; IR-FRACT-NEXT: [[COND:%.*]] = insertelement <2 x double> [[TMP4]], double [[TMP3]], i64 1 ; IR-FRACT-NEXT: [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]]) -; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], +; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], splat (double 0x7FF0000000000000) ; IR-FRACT-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]] ; IR-FRACT-NEXT: store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 ; IR-FRACT-NEXT: ret <2 x double> [[COND6]] diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-p7-in-memory.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-p7-in-memory.ll index 5ff08a5e8227fe..9b2e2f950a39d6 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-p7-in-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-p7-in-memory.ll @@ -31,13 +31,13 @@ define void @vector_copy(ptr %a, ptr %b) { ; CHECK-LABEL: define void @vector_copy ; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[X:%.*]] = load <4 x i160>, ptr [[A]], align 128 -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i160> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i160> [[X]], splat (i160 32) ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i160> [[TMP1]] to <4 x i128> ; CHECK-NEXT: [[X_PTR_RSRC:%.*]] = inttoptr <4 x i128> [[TMP2]] to <4 x ptr addrspace(8)> ; CHECK-NEXT: [[X_PTR_OFF:%.*]] = trunc <4 x i160> [[X]] to <4 x i32> ; CHECK-NEXT: [[B1:%.*]] = getelementptr <4 x i160>, ptr [[B]], i64 2 ; CHECK-NEXT: [[X_PTR_INT_RSRC:%.*]] = ptrtoint <4 x ptr addrspace(8)> [[X_PTR_RSRC]] to <4 x i160> -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i160> [[X_PTR_INT_RSRC]], +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i160> [[X_PTR_INT_RSRC]], splat (i160 32) ; CHECK-NEXT: [[X_PTR_INT_OFF:%.*]] = zext <4 x i32> [[X_PTR_OFF]] to <4 x i160> ; CHECK-NEXT: [[X_PTR_INT:%.*]] = or <4 x i160> [[TMP3]], [[X_PTR_INT_OFF]] ; CHECK-NEXT: store <4 x i160> [[X_PTR_INT]], ptr [[B1]], align 128 diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll index cc98b5333c5bb8..3191abae34ac60 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll @@ -27,9 +27,9 @@ define <2 x ptr addrspace(7)> @gep_vectors(<2 x ptr addrspace(7)> %in, <2 x i32> ; CHECK-SAME: ({ <2 x ptr addrspace(8)>, <2 x i32> } [[IN:%.*]], <2 x i32> [[IDX:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[IN_RSRC:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[IN]], 0 ; CHECK-NEXT: [[IN_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[IN]], 1 -; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw <2 x i32> [[IDX]], -; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], -; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], +; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw <2 x i32> [[IDX]], splat (i32 40) +; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], splat (i32 8) +; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], splat (i32 24) ; CHECK-NEXT: [[RET:%.*]] = add <2 x i32> [[IN_OFF]], [[RET_OFFS1]] ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1 @@ -47,9 +47,9 @@ define <2 x ptr addrspace(7)> @gep_vector_scalar(<2 x ptr addrspace(7)> %in, i64 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[IDX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLAT_C:%.*]] = trunc <2 x i64> [[DOTSPLAT]] to <2 x i32> -; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw <2 x i32> [[DOTSPLAT_C]], -; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], -; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], +; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw <2 x i32> [[DOTSPLAT_C]], splat (i32 40) +; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], splat (i32 8) +; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], splat (i32 24) ; CHECK-NEXT: [[RET:%.*]] = add <2 x i32> [[IN_OFF]], [[RET_OFFS1]] ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1 @@ -153,7 +153,7 @@ define <2 x i160> @ptrtoint_vec(<2 x ptr addrspace(7)> %ptr) { ; CHECK-NEXT: [[PTR_RSRC:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[PTR]], 0 ; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[PTR]], 1 ; CHECK-NEXT: [[RET_RSRC:%.*]] = ptrtoint <2 x ptr addrspace(8)> [[PTR_RSRC]] to <2 x i160> -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <2 x i160> [[RET_RSRC]], +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <2 x i160> [[RET_RSRC]], splat (i160 32) ; CHECK-NEXT: [[RET_OFF:%.*]] = zext <2 x i32> [[PTR_OFF]] to <2 x i160> ; CHECK-NEXT: [[RET:%.*]] = or <2 x i160> [[TMP1]], [[RET_OFF]] ; CHECK-NEXT: ret <2 x i160> [[RET]] @@ -221,7 +221,7 @@ define ptr addrspace(7) @inttoptr(i160 %v) { define <2 x ptr addrspace(7)> @inttoptr_vec(<2 x i160> %v) { ; CHECK-LABEL: define { <2 x ptr addrspace(8)>, <2 x i32> } @inttoptr_vec ; CHECK-SAME: (<2 x i160> [[V:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i160> [[V]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i160> [[V]], splat (i160 32) ; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i160> [[TMP1]] to <2 x i128> ; CHECK-NEXT: [[RET_RSRC:%.*]] = inttoptr <2 x i128> [[TMP2]] to <2 x ptr addrspace(8)> ; CHECK-NEXT: [[RET_OFF:%.*]] = trunc <2 x i160> [[V]] to <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll index bd9234d864b3d8..05c727201bbf1d 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll @@ -63,7 +63,7 @@ define amdgpu_vs void @promote_store_aggr() #0 { ; CHECK-NEXT: [[FOO6_FCA_1_INSERT:%.*]] = insertvalue [2 x float] [[FOO6_FCA_0_INSERT]], float 2.000000e+00, 1 ; CHECK-NEXT: [[FOO7:%.*]] = getelementptr [[BLOCK2:%.*]], ptr addrspace(1) @block2, i32 0, i32 1 ; CHECK-NEXT: store [2 x float] [[FOO6_FCA_1_INSERT]], ptr addrspace(1) [[FOO7]], align 4 -; CHECK-NEXT: store <4 x float> , ptr addrspace(1) @pv, align 16 +; CHECK-NEXT: store <4 x float> splat (float 1.000000e+00), ptr addrspace(1) @pv, align 16 ; CHECK-NEXT: ret void ; %i = alloca i32, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll index f1e2737b370ef0..74651e10c7809f 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll @@ -23,7 +23,7 @@ entry: define amdgpu_kernel void @memset_all_5(i64 %val) { ; CHECK-LABEL: @memset_all_5( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> , i64 [[VAL:%.*]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> splat (i64 361700864190383365), i64 [[VAL:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL]], i32 1 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll index bf21ed6898ff46..c8e3f97f61515f 100644 --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -441,7 +441,7 @@ define half @test_pow_fast_f16__y_13(half %x) { ; GCN-LABEL: define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) ; GCN: %__fabs = tail call fast <2 x half> @llvm.fabs.v2f16(<2 x half> %x) ; GCN: %__log2 = tail call fast <2 x half> @llvm.log2.v2f16(<2 x half> %__fabs) -; GCN: %__ylogx = fmul fast <2 x half> %__log2, +; GCN: %__ylogx = fmul fast <2 x half> %__log2, splat (half 0xH4A80) ; GCN: %__exp2 = tail call fast <2 x half> @llvm.exp2.v2f16(<2 x half> %__ylogx) ; GCN: %1 = tail call <2 x half> @llvm.copysign.v2f16(<2 x half> %__exp2, <2 x half> %x) define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) { diff --git a/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll b/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll index 1e60261df13046..29996d68040e73 100644 --- a/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll +++ b/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll @@ -357,7 +357,7 @@ define void @broken_phi() { ; GFX906-NEXT: bb: ; GFX906-NEXT: br label [[BB1:%.*]] ; GFX906: bb1: -; GFX906-NEXT: [[I:%.*]] = phi <4 x i8> [ , [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] +; GFX906-NEXT: [[I:%.*]] = phi <4 x i8> [ splat (i8 1), [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ] ; GFX906-NEXT: br i1 false, label [[BB3:%.*]], label [[BB2:%.*]] ; GFX906: bb2: ; GFX906-NEXT: br label [[BB3]] @@ -432,7 +432,7 @@ define amdgpu_kernel void @deletedPHI(i32 %in0, i1 %cmp, <10 x i8> %invec0) { ; GFX906-NEXT: br label [[BB_1:%.*]] ; GFX906: bb.1: ; GFX906-NEXT: [[PHI0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB_11:%.*]] ] -; GFX906-NEXT: [[PHI1:%.*]] = phi <10 x i8> [ , [[ENTRY]] ], [ [[VEC1:%.*]], [[BB_11]] ] +; GFX906-NEXT: [[PHI1:%.*]] = phi <10 x i8> [ splat (i8 1), [[ENTRY]] ], [ [[VEC1:%.*]], [[BB_11]] ] ; GFX906-NEXT: br i1 [[CMP]], label [[BB_3:%.*]], label [[BB_2:%.*]] ; GFX906: bb.2: ; GFX906-NEXT: br label [[BB_3]] @@ -514,7 +514,7 @@ define amdgpu_kernel void @multiple_unwind(i1 %cmp, <10 x i8> %invec) { ; GFX906-NEXT: entry: ; GFX906-NEXT: br label [[BB_1:%.*]] ; GFX906: bb.1: -; GFX906-NEXT: [[PHI0:%.*]] = phi <10 x i8> [ , [[ENTRY:%.*]] ], [ [[PHI3:%.*]], [[BB_8:%.*]] ] +; GFX906-NEXT: [[PHI0:%.*]] = phi <10 x i8> [ splat (i8 1), [[ENTRY:%.*]] ], [ [[PHI3:%.*]], [[BB_8:%.*]] ] ; GFX906-NEXT: br i1 [[CMP]], label [[BB_3:%.*]], label [[BB_2:%.*]] ; GFX906: bb.2: ; GFX906-NEXT: br label [[BB_3]] diff --git a/llvm/test/CodeGen/ARM/vector-promotion.ll b/llvm/test/CodeGen/ARM/vector-promotion.ll index f4a2a4a4521e99..a9a8f58963a374 100644 --- a/llvm/test/CodeGen/ARM/vector-promotion.ll +++ b/llvm/test/CodeGen/ARM/vector-promotion.ll @@ -127,7 +127,7 @@ define i32 @unsupportedMultiUses(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 [[EXTRACT]], 7 ; Vector version: -; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = udiv <2 x i32> [[LOAD]], +; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = udiv <2 x i32> [[LOAD]], splat (i32 7) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1 ; ; IR-BOTH-NEXT: store i32 [[RES]], ptr %dest @@ -146,7 +146,7 @@ define void @udivCase(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = urem i32 [[EXTRACT]], 7 ; Vector version: -; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = urem <2 x i32> [[LOAD]], +; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = urem <2 x i32> [[LOAD]], splat (i32 7) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1 ; ; IR-BOTH-NEXT: store i32 [[RES]], ptr %dest @@ -165,7 +165,7 @@ define void @uremCase(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sdiv i32 [[EXTRACT]], 7 ; Vector version: -; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = sdiv <2 x i32> [[LOAD]], +; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = sdiv <2 x i32> [[LOAD]], splat (i32 7) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1 ; ; IR-BOTH-NEXT: store i32 [[RES]], ptr %dest @@ -184,7 +184,7 @@ define void @sdivCase(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 [[EXTRACT]], 7 ; Vector version: -; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = srem <2 x i32> [[LOAD]], +; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = srem <2 x i32> [[LOAD]], splat (i32 7) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1 ; ; IR-BOTH-NEXT: store i32 [[RES]], ptr %dest @@ -203,7 +203,7 @@ define void @sremCase(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fdiv float [[EXTRACT]], 7.0 ; Vector version: -; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = fdiv <2 x float> [[LOAD]], +; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = fdiv <2 x float> [[LOAD]], splat (float 7.000000e+00) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1 ; ; IR-BOTH-NEXT: store float [[RES]], ptr %dest @@ -222,7 +222,7 @@ define void @fdivCase(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem float [[EXTRACT]], 7.0 ; Vector version: -; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem <2 x float> [[LOAD]], +; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem <2 x float> [[LOAD]], splat (float 7.000000e+00) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1 ; ; IR-BOTH-NEXT: store float [[RES]], ptr %dest @@ -342,7 +342,7 @@ define void @simpleOneInstructionPromotionFloat(ptr %addr1, ptr %dest) { ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 %idx ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1 ; Vector version: -; IR-STRESS-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], +; IR-STRESS-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], splat (i32 1) ; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[OR]], i32 %idx ; ; IR-BOTH-NEXT: store i32 [[RES]], ptr %dest diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll index 73279844e91b33..6eece708fa337d 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll @@ -135,18 +135,18 @@ define void @f3(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 { ; CHECK-NEXT: [[CST3:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32> ; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST3]], <32 x i32> undef, i32 [[PTI1]]) ; CHECK-NEXT: [[CST4:%.*]] = bitcast <32 x i32> [[CUP]] to <128 x i8> -; CHECK-NEXT: [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> , <32 x i32> zeroinitializer, i32 [[PTI1]]) +; CHECK-NEXT: [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> zeroinitializer, i32 [[PTI1]]) ; CHECK-NEXT: [[CST6:%.*]] = bitcast <32 x i32> [[CUP5]] to <128 x i8> ; CHECK-NEXT: [[CST7:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32> ; CHECK-NEXT: [[CST8:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32> ; CHECK-NEXT: [[CUP9:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST7]], <32 x i32> [[CST8]], i32 [[PTI1]]) ; CHECK-NEXT: [[CST10:%.*]] = bitcast <32 x i32> [[CUP9]] to <128 x i8> -; CHECK-NEXT: [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> , <32 x i32> , i32 [[PTI1]]) +; CHECK-NEXT: [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> splat (i32 -1), i32 [[PTI1]]) ; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP11]] to <128 x i8> ; CHECK-NEXT: [[CST13:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32> ; CHECK-NEXT: [[CUP14:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[CST13]], i32 [[PTI1]]) ; CHECK-NEXT: [[CST15:%.*]] = bitcast <32 x i32> [[CUP14]] to <128 x i8> -; CHECK-NEXT: [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> , i32 [[PTI1]]) +; CHECK-NEXT: [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> splat (i32 -1), i32 [[PTI1]]) ; CHECK-NEXT: [[CST17:%.*]] = bitcast <32 x i32> [[CUP16]] to <128 x i8> ; CHECK-NEXT: [[TRN:%.*]] = trunc <128 x i8> [[CST6]] to <128 x i1> ; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST4]], ptr [[ITP]], i32 128, <128 x i1> [[TRN]]), !tbaa [[TBAA5:![0-9]+]] @@ -188,18 +188,18 @@ define void @f4(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 { ; CHECK-NEXT: [[CST3:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32> ; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST3]], <32 x i32> undef, i32 [[PTI1]]) ; CHECK-NEXT: [[CST4:%.*]] = bitcast <32 x i32> [[CUP]] to <128 x i8> -; CHECK-NEXT: [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> , <32 x i32> zeroinitializer, i32 [[PTI1]]) +; CHECK-NEXT: [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> zeroinitializer, i32 [[PTI1]]) ; CHECK-NEXT: [[CST6:%.*]] = bitcast <32 x i32> [[CUP5]] to <128 x i8> ; CHECK-NEXT: [[CST7:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32> ; CHECK-NEXT: [[CST8:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32> ; CHECK-NEXT: [[CUP9:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST7]], <32 x i32> [[CST8]], i32 [[PTI1]]) ; CHECK-NEXT: [[CST10:%.*]] = bitcast <32 x i32> [[CUP9]] to <128 x i8> -; CHECK-NEXT: [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> , <32 x i32> , i32 [[PTI1]]) +; CHECK-NEXT: [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> splat (i32 -1), i32 [[PTI1]]) ; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP11]] to <128 x i8> ; CHECK-NEXT: [[CST13:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32> ; CHECK-NEXT: [[CUP14:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[CST13]], i32 [[PTI1]]) ; CHECK-NEXT: [[CST15:%.*]] = bitcast <32 x i32> [[CUP14]] to <128 x i8> -; CHECK-NEXT: [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> , i32 [[PTI1]]) +; CHECK-NEXT: [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> splat (i32 -1), i32 [[PTI1]]) ; CHECK-NEXT: [[CST17:%.*]] = bitcast <32 x i32> [[CUP16]] to <128 x i8> ; CHECK-NEXT: [[TRN:%.*]] = trunc <128 x i8> [[CST6]] to <128 x i1> ; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST4]], ptr [[ITP]], i32 128, <128 x i1> [[TRN]]) @@ -241,18 +241,18 @@ define void @f5(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 { ; CHECK-NEXT: [[CST3:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32> ; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST3]], <32 x i32> undef, i32 [[PTI1]]) ; CHECK-NEXT: [[CST4:%.*]] = bitcast <32 x i32> [[CUP]] to <128 x i8> -; CHECK-NEXT: [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> , <32 x i32> zeroinitializer, i32 [[PTI1]]) +; CHECK-NEXT: [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> zeroinitializer, i32 [[PTI1]]) ; CHECK-NEXT: [[CST6:%.*]] = bitcast <32 x i32> [[CUP5]] to <128 x i8> ; CHECK-NEXT: [[CST7:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32> ; CHECK-NEXT: [[CST8:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32> ; CHECK-NEXT: [[CUP9:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST7]], <32 x i32> [[CST8]], i32 [[PTI1]]) ; CHECK-NEXT: [[CST10:%.*]] = bitcast <32 x i32> [[CUP9]] to <128 x i8> -; CHECK-NEXT: [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> , <32 x i32> , i32 [[PTI1]]) +; CHECK-NEXT: [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> splat (i32 -1), i32 [[PTI1]]) ; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP11]] to <128 x i8> ; CHECK-NEXT: [[CST13:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32> ; CHECK-NEXT: [[CUP14:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[CST13]], i32 [[PTI1]]) ; CHECK-NEXT: [[CST15:%.*]] = bitcast <32 x i32> [[CUP14]] to <128 x i8> -; CHECK-NEXT: [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> , i32 [[PTI1]]) +; CHECK-NEXT: [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> splat (i32 -1), i32 [[PTI1]]) ; CHECK-NEXT: [[CST17:%.*]] = bitcast <32 x i32> [[CUP16]] to <128 x i8> ; CHECK-NEXT: [[TRN:%.*]] = trunc <128 x i8> [[CST6]] to <128 x i1> ; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST4]], ptr [[ITP]], i32 128, <128 x i1> [[TRN]]), !tbaa [[TBAA5]] diff --git a/llvm/test/CodeGen/NVPTX/variadics-lowering.ll b/llvm/test/CodeGen/NVPTX/variadics-lowering.ll index 5c8eb265843645..5502980a263b2e 100644 --- a/llvm/test/CodeGen/NVPTX/variadics-lowering.ll +++ b/llvm/test/CodeGen/NVPTX/variadics-lowering.ll @@ -276,7 +276,7 @@ define dso_local i32 @baz() { ; CHECK-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[BAZ_VARARG:%.*]], align 16 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]]) ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[BAZ_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP0]], align 16 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP0]], align 16 ; CHECK-NEXT: [[CALL:%.*]] = call i32 @variadics3(i32 noundef 1, ptr [[VARARG_BUFFER]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]]) ; CHECK-NEXT: ret i32 [[CALL]] diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll index 32aec9586c29cb..58b2338650eaab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll @@ -12,16 +12,16 @@ define void @gather_bad_or(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], -; CHECK-NEXT: [[OR:%.*]] = or <32 x i64> [[I]], +; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], splat (i64 5) +; CHECK-NEXT: [[OR:%.*]] = or <32 x i64> [[I]], splat (i64 1) ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i64> [[OR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> , <32 x i8> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: @@ -58,15 +58,15 @@ define void @gather_narrow_index(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw <32 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw <32 x i32> [[VEC_IND]], splat (i32 5) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i32> [[TMP0]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[TMP1]], i32 1, <32 x i1> , <32 x i8> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[TMP1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], splat (i32 32) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: @@ -102,16 +102,16 @@ define void @gather_broken_stride(ptr noalias nocapture %A, ptr noalias nocaptur ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], -; CHECK-NEXT: [[OR:%.*]] = or <32 x i64> [[I]], +; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], splat (i64 5) +; CHECK-NEXT: [[OR:%.*]] = or <32 x i64> [[I]], splat (i64 1) ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i64> [[OR]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> , <32 x i8> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll index d723c2f6df1afd..2cbbfc019ab4df 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll @@ -16,8 +16,8 @@ define void @gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -102,8 +102,8 @@ define void @gather_negative_stride(ptr noalias nocapture %A, ptr noalias nocapt ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 155, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 -5, <32 x i1> , i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 -5, <32 x i1> splat (i1 true), i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -145,8 +145,8 @@ define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 0, <32 x i1> , i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 0, <32 x i1> splat (i1 true), i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -194,10 +194,10 @@ define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> , i32 32) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 ; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -282,8 +282,8 @@ define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -328,8 +328,8 @@ define void @gather_unknown_pow2(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 [[TMP0]], <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 [[TMP0]], <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -376,13 +376,13 @@ define void @negative_shl_non_commute(ptr noalias nocapture %A, ptr noalias noca ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I:%.*]] = shl nsw <8 x i64> [[DOTSPLAT]], [[VEC_IND]] ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <8 x i64> [[I]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[I1]], i32 4, <8 x i1> , <8 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[I1]], i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <8 x i32> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: @@ -426,10 +426,10 @@ define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readon ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I4]], ptr [[TMP0]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I4]], ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 32 ; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -477,10 +477,10 @@ define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 8, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[B]], i64 [[VEC_IND_SCALAR1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 4 ; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr [[I2]], i64 8 @@ -548,37 +548,37 @@ define void @gather_unroll(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: [[VEC_IND_SCALAR11:%.*]] = phi i64 [ 12, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND_SCALAR13:%.*]] = phi i64 [ 3, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR14:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 64, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 64, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]] -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP2]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER52:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP2]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER52:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I3:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER52]], [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I3]], ptr [[TMP2]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I3]], ptr [[TMP2]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR3]] -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP4]], i64 64, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER53:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP5]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP4]], i64 64, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER53:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP5]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR5]] -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP6]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER54:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP7]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP6]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER54:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP7]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER54]], [[WIDE_MASKED_GATHER53]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I8]], ptr [[TMP6]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I8]], ptr [[TMP6]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR7]] -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP8]], i64 64, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER55:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP9]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP8]], i64 64, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER55:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP9]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR9]] -; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP10]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER56:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP11]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP10]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER56:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP11]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I13:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER56]], [[WIDE_MASKED_GATHER55]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I13]], ptr [[TMP10]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I13]], ptr [[TMP10]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR11]] -; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP12]], i64 64, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER57:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP13]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP12]], i64 64, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER57:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP13]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR13]] -; CHECK-NEXT: [[TMP15:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP14]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER58:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> , <8 x i32> [[TMP15]], <8 x i32> undef, i32 8) +; CHECK-NEXT: [[TMP15:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP14]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[WIDE_MASKED_GATHER58:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP15]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I18:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER58]], [[WIDE_MASKED_GATHER57]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I18]], ptr [[TMP14]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I18]], ptr [[TMP14]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 128 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR2]] = add i64 [[VEC_IND_SCALAR1]], 32 @@ -655,10 +655,10 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; V-NEXT: [[I3_SCALAR1:%.*]] = phi i64 [ 10, [[BB]] ], [ [[I16_SCALAR2:%.*]], [[BB2]] ] ; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG1:%.*]], i64 [[I3_SCALAR]] ; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[I3_SCALAR1]] -; V-NEXT: [[TMP2:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP0]], i64 40, <2 x i1> , i32 2) -; V-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> , <2 x ptr> [[TMP2]], <2 x ptr> undef, i32 2) -; V-NEXT: [[TMP3:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP1]], i64 40, <2 x i1> , i32 2) -; V-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> , <2 x ptr> [[TMP3]], <2 x ptr> undef, i32 2) +; V-NEXT: [[TMP2:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP0]], i64 40, <2 x i1> splat (i1 true), i32 2) +; V-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> splat (i1 true), <2 x ptr> [[TMP2]], <2 x ptr> undef, i32 2) +; V-NEXT: [[TMP3:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP1]], i64 40, <2 x i1> splat (i1 true), i32 2) +; V-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> splat (i1 true), <2 x ptr> [[TMP3]], <2 x ptr> undef, i32 2) ; V-NEXT: [[I11:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], i64 [[I]] ; V-NEXT: store <2 x ptr> [[I9]], ptr [[I11]], align 8 ; V-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[I11]], i64 2 @@ -677,19 +677,19 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; ZVE32F: bb2: ; ZVE32F-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[I15:%.*]], [[BB2]] ] ; ZVE32F-NEXT: [[I3:%.*]] = phi <2 x i64> [ , [[BB]] ], [ [[I16:%.*]], [[BB2]] ] -; ZVE32F-NEXT: [[I4:%.*]] = mul nuw nsw <2 x i64> [[I3]], -; ZVE32F-NEXT: [[I5:%.*]] = mul <2 x i64> [[I3]], -; ZVE32F-NEXT: [[I6:%.*]] = add <2 x i64> [[I5]], +; ZVE32F-NEXT: [[I4:%.*]] = mul nuw nsw <2 x i64> [[I3]], splat (i64 5) +; ZVE32F-NEXT: [[I5:%.*]] = mul <2 x i64> [[I3]], splat (i64 5) +; ZVE32F-NEXT: [[I6:%.*]] = add <2 x i64> [[I5]], splat (i64 10) ; ZVE32F-NEXT: [[I7:%.*]] = getelementptr inbounds ptr, ptr [[ARG1:%.*]], <2 x i64> [[I4]] ; ZVE32F-NEXT: [[I8:%.*]] = getelementptr inbounds ptr, ptr [[ARG1]], <2 x i64> [[I6]] -; ZVE32F-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I7]], i32 8, <2 x i1> , <2 x ptr> undef) -; ZVE32F-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I8]], i32 8, <2 x i1> , <2 x ptr> undef) +; ZVE32F-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I7]], i32 8, <2 x i1> splat (i1 true), <2 x ptr> undef) +; ZVE32F-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I8]], i32 8, <2 x i1> splat (i1 true), <2 x ptr> undef) ; ZVE32F-NEXT: [[I11:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], i64 [[I]] ; ZVE32F-NEXT: store <2 x ptr> [[I9]], ptr [[I11]], align 8 ; ZVE32F-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[I11]], i64 2 ; ZVE32F-NEXT: store <2 x ptr> [[I10]], ptr [[I13]], align 8 ; ZVE32F-NEXT: [[I15]] = add nuw i64 [[I]], 4 -; ZVE32F-NEXT: [[I16]] = add <2 x i64> [[I3]], +; ZVE32F-NEXT: [[I16]] = add <2 x i64> [[I3]], splat (i64 4) ; ZVE32F-NEXT: [[I17:%.*]] = icmp eq i64 [[I15]], 1024 ; ZVE32F-NEXT: br i1 [[I17]], label [[BB18:%.*]], label [[BB2]] ; ZVE32F: bb18: @@ -738,8 +738,8 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu ; V-NEXT: [[I9:%.*]] = load <2 x ptr>, ptr [[I7]], align 8 ; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG:%.*]], i64 [[I3_SCALAR]] ; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG]], i64 [[I3_SCALAR1]] -; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I6]], ptr [[TMP0]], i64 40, <2 x i1> , i32 2) -; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I9]], ptr [[TMP1]], i64 40, <2 x i1> , i32 2) +; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I6]], ptr [[TMP0]], i64 40, <2 x i1> splat (i1 true), i32 2) +; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I9]], ptr [[TMP1]], i64 40, <2 x i1> splat (i1 true), i32 2) ; V-NEXT: [[I15]] = add nuw i64 [[I]], 4 ; V-NEXT: [[I16_SCALAR]] = add i64 [[I3_SCALAR]], 20 ; V-NEXT: [[I16_SCALAR2]] = add i64 [[I3_SCALAR1]], 20 @@ -758,15 +758,15 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu ; ZVE32F-NEXT: [[I6:%.*]] = load <2 x ptr>, ptr [[I4]], align 8 ; ZVE32F-NEXT: [[I7:%.*]] = getelementptr inbounds ptr, ptr [[I4]], i64 2 ; ZVE32F-NEXT: [[I9:%.*]] = load <2 x ptr>, ptr [[I7]], align 8 -; ZVE32F-NEXT: [[I10:%.*]] = mul nuw nsw <2 x i64> [[I3]], -; ZVE32F-NEXT: [[I11:%.*]] = mul <2 x i64> [[I3]], -; ZVE32F-NEXT: [[I12:%.*]] = add <2 x i64> [[I11]], +; ZVE32F-NEXT: [[I10:%.*]] = mul nuw nsw <2 x i64> [[I3]], splat (i64 5) +; ZVE32F-NEXT: [[I11:%.*]] = mul <2 x i64> [[I3]], splat (i64 5) +; ZVE32F-NEXT: [[I12:%.*]] = add <2 x i64> [[I11]], splat (i64 10) ; ZVE32F-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], <2 x i64> [[I10]] ; ZVE32F-NEXT: [[I14:%.*]] = getelementptr inbounds ptr, ptr [[ARG]], <2 x i64> [[I12]] -; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I6]], <2 x ptr> [[I13]], i32 8, <2 x i1> ) -; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I9]], <2 x ptr> [[I14]], i32 8, <2 x i1> ) +; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I6]], <2 x ptr> [[I13]], i32 8, <2 x i1> splat (i1 true)) +; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I9]], <2 x ptr> [[I14]], i32 8, <2 x i1> splat (i1 true)) ; ZVE32F-NEXT: [[I15]] = add nuw i64 [[I]], 4 -; ZVE32F-NEXT: [[I16]] = add <2 x i64> [[I3]], +; ZVE32F-NEXT: [[I16]] = add <2 x i64> [[I3]], splat (i64 4) ; ZVE32F-NEXT: [[I17:%.*]] = icmp eq i64 [[I15]], 1024 ; ZVE32F-NEXT: br i1 [[I17]], label [[BB18:%.*]], label [[BB2]] ; ZVE32F: bb18: @@ -822,8 +822,8 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt ; CHECK-NEXT: [[I17_SCALAR:%.*]] = phi i64 [ [[START]], [[BB9]] ], [ [[I28_SCALAR:%.*]], [[BB15]] ] ; CHECK-NEXT: [[I18:%.*]] = add i64 [[I16]], [[I4]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I17_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) -; CHECK-NEXT: [[I21:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) +; CHECK-NEXT: [[I21:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I22:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I18]] ; CHECK-NEXT: [[I24:%.*]] = load <32 x i8>, ptr [[I22]], align 1 ; CHECK-NEXT: [[I25:%.*]] = add <32 x i8> [[I24]], [[I21]] @@ -931,8 +931,8 @@ define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr ; CHECK-NEXT: [[I5:%.*]] = phi i64 [ [[I13:%.*]], [[BB4]] ], [ 0, [[BB2]] ] ; CHECK-NEXT: [[I6_SCALAR:%.*]] = phi i64 [ 0, [[BB2]] ], [ [[I14_SCALAR:%.*]], [[BB4]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I6_SCALAR]] -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr [[TMP0]], i64 5, <16 x i1> , i32 16) -; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.vp.select.v16i8(<16 x i1> , <16 x i8> [[TMP1]], <16 x i8> undef, i32 16) +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr [[TMP0]], i64 5, <16 x i1> splat (i1 true), i32 16) +; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.vp.select.v16i8(<16 x i1> splat (i1 true), <16 x i8> [[TMP1]], <16 x i8> undef, i32 16) ; CHECK-NEXT: [[I10:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I5]] ; CHECK-NEXT: [[I11:%.*]] = load <16 x i8>, ptr [[I10]], align 1 ; CHECK-NEXT: [[I12:%.*]] = add <16 x i8> [[I11]], [[I9]] @@ -974,8 +974,8 @@ bb16: ; preds = %bb4, %bb define <8 x i8> @broadcast_ptr_base(ptr %a) { ; CHECK-LABEL: @broadcast_ptr_base( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr [[A:%.*]], i64 64, <8 x i1> , i32 8) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vp.select.v8i8(<8 x i1> , <8 x i8> [[TMP0]], <8 x i8> poison, i32 8) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr [[A:%.*]], i64 64, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vp.select.v8i8(<8 x i1> splat (i1 true), <8 x i8> [[TMP0]], <8 x i8> poison, i32 8) ; CHECK-NEXT: ret <8 x i8> [[TMP1]] ; entry: @@ -995,15 +995,15 @@ define void @gather_narrow_idx(ptr noalias nocapture %A, ptr noalias nocapture r ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i16> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i16> [[VEC_IND]], +; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i16> [[VEC_IND]], splat (i16 5) ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i16> [[I]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> , <32 x i8> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i16> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i16> [[VEC_IND]], splat (i16 32) ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll index 6342e288647c2f..e70868d4ffeb03 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll @@ -119,7 +119,7 @@ define dso_local void @foo3(ptr noalias nocapture %A, ptr noalias nocapture read ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[INDUCTION]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[INDUCTION]], splat (i32 32002) ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) ; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] @@ -226,13 +226,33 @@ for.cond.cleanup: ret void } +; +define dso_local void @inconsistent_tripcounts(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @inconsistent_tripcounts( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 -1) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; -define dso_local void @inconsistent_tripcounts(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { entry: %start = call i32 @llvm.start.loop.iterations.i32(i32 8001) br label %vector.body @@ -261,13 +281,33 @@ for.cond.cleanup: ret void } +; +define dso_local void @overflow_in_sub(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @overflow_in_sub( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 1073741824) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 32003) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; -define dso_local void @overflow_in_sub(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { entry: %start = call i32 @llvm.start.loop.iterations.i32(i32 1073741824) br label %vector.body @@ -296,13 +336,33 @@ for.cond.cleanup: } +; +define dso_local void @IV_not_an_induction(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @IV_not_an_induction( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[N:%.*]], i32 32003) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; -define dso_local void @IV_not_an_induction(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { entry: %start = call i32 @llvm.start.loop.iterations.i32(i32 8001) br label %vector.body @@ -331,13 +391,33 @@ for.cond.cleanup: ret void } +; +define dso_local void @IV_wrong_step(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @IV_wrong_step( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 32003) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; -define dso_local void @IV_wrong_step(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { entry: %start = call i32 @llvm.start.loop.iterations.i32(i32 8001) br label %vector.body @@ -369,13 +449,33 @@ for.cond.cleanup: ret void } +; +define dso_local void @IV_step_not_constant(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @IV_step_not_constant( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 32003) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[N:%.*]] +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4 +; CHECK-NEXT: [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4 +; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4 +; CHECK-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; -define dso_local void @IV_step_not_constant(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { entry: %start = call i32 @llvm.start.loop.iterations.i32(i32 8001) br label %vector.body @@ -406,13 +506,49 @@ for.cond.cleanup: ret void } +; +define dso_local void @outerloop_phi(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @outerloop_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP24:%.*]] = icmp eq i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_PH_PREHEADER:%.*]] +; CHECK: vector.ph.preheader: +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[LSR_IV36:%.*]] = phi ptr [ [[B:%.*]], [[VECTOR_PH_PREHEADER]] ], [ [[SCEVGEP37:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] +; CHECK-NEXT: [[LSR_IV31:%.*]] = phi ptr [ [[C:%.*]], [[VECTOR_PH_PREHEADER]] ], [ [[SCEVGEP32:%.*]], [[FOR_COND_CLEANUP3]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[A:%.*]], [[VECTOR_PH_PREHEADER]] ], [ [[SCEVGEP:%.*]], [[FOR_COND_CLEANUP3]] ] +; CHECK-NEXT: [[J_025:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3]] ], [ 0, [[VECTOR_PH_PREHEADER]] ] +; CHECK-NEXT: [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 1025) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NOT: @llvm.arm.mve.vctp32 -; CHECK: @llvm.get.active.lane.mask -; CHECK: ret void +; CHECK-NEXT: [[LSR_IV38:%.*]] = phi ptr [ [[SCEVGEP39:%.*]], [[VECTOR_BODY]] ], [ [[LSR_IV36]], [[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV33:%.*]] = phi ptr [ [[SCEVGEP34:%.*]], [[VECTOR_BODY]] ], [ [[LSR_IV31]], [[VECTOR_PH]] ] +; CHECK-NEXT: [[LSR_IV28:%.*]] = phi ptr [ [[SCEVGEP29:%.*]], [[VECTOR_BODY]] ], [ [[LSR_IV]], [[VECTOR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[J_025]], i32 4096) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV38]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV33]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD27]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP1]], ptr [[LSR_IV28]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[SCEVGEP29]] = getelementptr i32, ptr [[LSR_IV28]], i32 4 +; CHECK-NEXT: [[SCEVGEP34]] = getelementptr i32, ptr [[LSR_IV33]], i32 4 +; CHECK-NEXT: [[SCEVGEP39]] = getelementptr i32, ptr [[LSR_IV38]], i32 4 +; CHECK-NEXT: [[TMP2]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP3]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.cond.cleanup3: +; CHECK-NEXT: [[INC11]] = add nuw i32 [[J_025]], 1 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 1 +; CHECK-NEXT: [[SCEVGEP32]] = getelementptr i32, ptr [[LSR_IV31]], i32 1 +; CHECK-NEXT: [[SCEVGEP37]] = getelementptr i32, ptr [[LSR_IV36]], i32 1 +; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i32 [[INC11]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND26]], label [[FOR_COND_CLEANUP]], label [[VECTOR_PH]] ; -define dso_local void @outerloop_phi(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { entry: %cmp24 = icmp eq i32 %N, 0 br i1 %cmp24, label %for.cond.cleanup, label %vector.ph.preheader diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll index b91800c48cec4f..239372e1addb23 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll @@ -6,13 +6,13 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define arm_aapcs_vfpcc void @push_out_add_sub_block(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_add_sub_block( ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> , -; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], +; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> , splat (i32 6) +; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], splat (i32 2) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]] -; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], +; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], splat (i32 32) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] @@ -65,13 +65,13 @@ end: define arm_aapcs_vfpcc void @push_out_add_sub_block_commutedphi(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_add_sub_block_commutedphi( ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> , -; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], +; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> , splat (i32 6) +; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], splat (i32 2) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]] -; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], +; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], splat (i32 32) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] @@ -124,15 +124,15 @@ end: define arm_aapcs_vfpcc void @push_out_mul_sub_block(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_mul_sub_block( ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[PUSHEDOUTMUL:%.*]] = mul <4 x i32> , -; CHECK-NEXT: [[PRODUCT:%.*]] = mul <4 x i32> , -; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> [[PUSHEDOUTMUL]], -; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], +; CHECK-NEXT: [[PUSHEDOUTMUL:%.*]] = mul <4 x i32> , splat (i32 3) +; CHECK-NEXT: [[PRODUCT:%.*]] = mul <4 x i32> splat (i32 8), splat (i32 3) +; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> [[PUSHEDOUTMUL]], splat (i32 6) +; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], splat (i32 2) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]] -; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], +; CHECK-NEXT: [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], splat (i32 96) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] @@ -195,8 +195,8 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_loop(ptr noalias nocapture readonl ; CHECK: vector.2.ph: ; CHECK-NEXT: br label [[VECTOR_2_BODY:%.*]] ; CHECK: vector.2.body: -; CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[VEC_IND]], splat (i32 3) +; CHECK-NEXT: [[SCALEDINDEX:%.*]] = shl <4 x i32> [[TMP0]], splat (i32 2) ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -211,7 +211,7 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_loop(ptr noalias nocapture readonl ; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY_END]], label [[VECTOR_2_BODY]] ; CHECK: vector.body.end: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] ; CHECK-NEXT: br i1 [[TMP5]], label [[END:%.*]], label [[VECTOR_BODY]] ; CHECK: end: @@ -261,13 +261,13 @@ define arm_aapcs_vfpcc void @invariant_add(ptr noalias nocapture readonly %data, ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[L0:%.*]] = mul <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[L0:%.*]] = mul <4 x i32> [[VEC_IND]], splat (i32 3) ; CHECK-NEXT: [[L1:%.*]] = add <4 x i32> [[L0]], [[VEC_IND]] ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr [[DATA:%.*]], <4 x i32> [[L1]], i32 32, i32 2, i32 1) ; CHECK-NEXT: [[L3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[L3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; CHECK-NEXT: [[L5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] ; CHECK-NEXT: br i1 [[L5]], label [[END:%.*]], label [[VECTOR_BODY]] ; CHECK: end: diff --git a/llvm/test/CodeGen/X86/codegen-prepare-extload.ll b/llvm/test/CodeGen/X86/codegen-prepare-extload.ll index c18ae82aeb71fa..7846f82ad54cd0 100644 --- a/llvm/test/CodeGen/X86/codegen-prepare-extload.ll +++ b/llvm/test/CodeGen/X86/codegen-prepare-extload.ll @@ -523,7 +523,7 @@ declare void @dummy(i64, i64, i64) define void @vectorPromotion() { ; OPTALL-LABEL: define void @vectorPromotion() { ; OPTALL-NEXT: entry: -; OPTALL-NEXT: [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, +; OPTALL-NEXT: [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, splat (i32 8) ; OPTALL-NEXT: [[B:%.*]] = zext <2 x i32> [[A]] to <2 x i64> ; OPTALL-NEXT: ret void ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll index 4cc038f03ff2c3..8a1e3551b2741f 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool opt --version 4 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; ; RUN: opt < %s -passes=msan -S | FileCheck %s ; @@ -14,7 +14,7 @@ define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: unreachable @@ -25,7 +25,7 @@ define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -58,7 +58,7 @@ define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -69,7 +69,7 @@ define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -102,7 +102,7 @@ define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -113,7 +113,7 @@ define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -146,7 +146,7 @@ define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -157,7 +157,7 @@ define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -189,7 +189,7 @@ define <1 x i64> @sqshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -199,9 +199,9 @@ define <1 x i64> @sqshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -217,7 +217,7 @@ define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -228,7 +228,7 @@ define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -258,7 +258,7 @@ define i64 @sqshl_scalar_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -286,7 +286,7 @@ define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -297,7 +297,7 @@ define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -330,7 +330,7 @@ define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -341,7 +341,7 @@ define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -374,7 +374,7 @@ define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -385,7 +385,7 @@ define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -418,7 +418,7 @@ define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -429,7 +429,7 @@ define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -463,7 +463,7 @@ define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -474,7 +474,7 @@ define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -508,7 +508,7 @@ define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -519,7 +519,7 @@ define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -553,7 +553,7 @@ define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -564,7 +564,7 @@ define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -598,7 +598,7 @@ define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -609,7 +609,7 @@ define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -643,7 +643,7 @@ define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -654,7 +654,7 @@ define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -688,7 +688,7 @@ define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -699,7 +699,7 @@ define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -733,7 +733,7 @@ define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -744,7 +744,7 @@ define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -778,7 +778,7 @@ define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -789,7 +789,7 @@ define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -821,7 +821,7 @@ define <1 x i64> @uqshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -831,9 +831,9 @@ define <1 x i64> @uqshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -849,7 +849,7 @@ define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -860,7 +860,7 @@ define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -890,7 +890,7 @@ define i64 @uqshl_scalar_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -941,7 +941,7 @@ define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -952,7 +952,7 @@ define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -985,7 +985,7 @@ define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -996,7 +996,7 @@ define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1029,7 +1029,7 @@ define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1040,7 +1040,7 @@ define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1073,7 +1073,7 @@ define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1084,7 +1084,7 @@ define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1116,7 +1116,7 @@ define <1 x i64> @srshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1126,9 +1126,9 @@ define <1 x i64> @srshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -1144,7 +1144,7 @@ define i64 @srshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1155,7 +1155,7 @@ define i64 @srshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1185,7 +1185,7 @@ define i64 @srshl_scalar_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1213,7 +1213,7 @@ define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1224,7 +1224,7 @@ define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1257,7 +1257,7 @@ define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1268,7 +1268,7 @@ define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1301,7 +1301,7 @@ define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1312,7 +1312,7 @@ define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1345,7 +1345,7 @@ define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1356,7 +1356,7 @@ define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1388,7 +1388,7 @@ define <1 x i64> @urshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1398,9 +1398,9 @@ define <1 x i64> @urshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -1416,7 +1416,7 @@ define i64 @urshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1427,7 +1427,7 @@ define i64 @urshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1457,7 +1457,7 @@ define i64 @urshl_scalar_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1485,7 +1485,7 @@ define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1496,7 +1496,7 @@ define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1530,7 +1530,7 @@ define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1541,7 +1541,7 @@ define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1575,7 +1575,7 @@ define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1586,7 +1586,7 @@ define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1620,7 +1620,7 @@ define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1631,7 +1631,7 @@ define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1665,7 +1665,7 @@ define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1676,7 +1676,7 @@ define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1710,7 +1710,7 @@ define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1721,7 +1721,7 @@ define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1755,7 +1755,7 @@ define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1766,7 +1766,7 @@ define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1800,7 +1800,7 @@ define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1811,7 +1811,7 @@ define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1867,7 +1867,7 @@ define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1878,7 +1878,7 @@ define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1911,7 +1911,7 @@ define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1922,7 +1922,7 @@ define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1955,7 +1955,7 @@ define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1966,7 +1966,7 @@ define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -1999,7 +1999,7 @@ define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2010,7 +2010,7 @@ define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2043,7 +2043,7 @@ define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2054,7 +2054,7 @@ define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2087,7 +2087,7 @@ define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2098,7 +2098,7 @@ define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2131,7 +2131,7 @@ define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2142,7 +2142,7 @@ define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2176,7 +2176,7 @@ define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2187,7 +2187,7 @@ define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2221,7 +2221,7 @@ define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2232,7 +2232,7 @@ define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2266,7 +2266,7 @@ define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2277,7 +2277,7 @@ define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2311,7 +2311,7 @@ define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2322,7 +2322,7 @@ define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2354,7 +2354,7 @@ define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2364,9 +2364,9 @@ define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -2382,7 +2382,7 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2393,7 +2393,7 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2423,7 +2423,7 @@ define i64 @sqrshl_scalar_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2451,7 +2451,7 @@ define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2462,7 +2462,7 @@ define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2496,7 +2496,7 @@ define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2507,7 +2507,7 @@ define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2541,7 +2541,7 @@ define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2552,7 +2552,7 @@ define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2586,7 +2586,7 @@ define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2597,7 +2597,7 @@ define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2631,7 +2631,7 @@ define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2642,7 +2642,7 @@ define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2674,7 +2674,7 @@ define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2684,9 +2684,9 @@ define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -2702,7 +2702,7 @@ define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2713,7 +2713,7 @@ define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2743,7 +2743,7 @@ define i64 @uqrshl_scalar_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2792,7 +2792,7 @@ define <8 x i8> @urshr8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2802,9 +2802,9 @@ define <8 x i8> @urshr8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] ; @@ -2819,7 +2819,7 @@ define <4 x i16> @urshr4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2829,9 +2829,9 @@ define <4 x i16> @urshr4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; @@ -2846,7 +2846,7 @@ define <2 x i32> @urshr2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2856,9 +2856,9 @@ define <2 x i32> @urshr2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; @@ -2873,7 +2873,7 @@ define <16 x i8> @urshr16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2883,9 +2883,9 @@ define <16 x i8> @urshr16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -2900,7 +2900,7 @@ define <8 x i16> @urshr8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2910,9 +2910,9 @@ define <8 x i16> @urshr8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -2927,7 +2927,7 @@ define <4 x i32> @urshr4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2937,9 +2937,9 @@ define <4 x i32> @urshr4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -2954,7 +2954,7 @@ define <2 x i64> @urshr2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2964,9 +2964,9 @@ define <2 x i64> @urshr2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -2981,7 +2981,7 @@ define <1 x i64> @urshr1d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -2991,9 +2991,9 @@ define <1 x i64> @urshr1d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -3008,7 +3008,7 @@ define i64 @urshr_scalar(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3035,7 +3035,7 @@ define <8 x i8> @srshr8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3045,9 +3045,9 @@ define <8 x i8> @srshr8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] ; @@ -3062,7 +3062,7 @@ define <4 x i16> @srshr4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3072,9 +3072,9 @@ define <4 x i16> @srshr4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; @@ -3089,7 +3089,7 @@ define <2 x i32> @srshr2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3099,9 +3099,9 @@ define <2 x i32> @srshr2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; @@ -3116,7 +3116,7 @@ define <16 x i8> @srshr16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3126,9 +3126,9 @@ define <16 x i8> @srshr16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -3143,7 +3143,7 @@ define <8 x i16> @srshr8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3153,9 +3153,9 @@ define <8 x i16> @srshr8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -3170,7 +3170,7 @@ define <4 x i32> @srshr4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3180,9 +3180,9 @@ define <4 x i32> @srshr4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -3197,7 +3197,7 @@ define <2 x i64> @srshr2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3207,9 +3207,9 @@ define <2 x i64> @srshr2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -3224,7 +3224,7 @@ define <1 x i64> @srshr1d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3234,9 +3234,9 @@ define <1 x i64> @srshr1d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -3251,7 +3251,7 @@ define i64 @srshr_scalar(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3278,7 +3278,7 @@ define <8 x i8> @sqshlu8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3288,9 +3288,9 @@ define <8 x i8> @sqshlu8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 1)) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] ; @@ -3305,7 +3305,7 @@ define <4 x i16> @sqshlu4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3315,9 +3315,9 @@ define <4 x i16> @sqshlu4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 1)) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; @@ -3332,7 +3332,7 @@ define <2 x i32> @sqshlu2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3342,9 +3342,9 @@ define <2 x i32> @sqshlu2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; @@ -3359,7 +3359,7 @@ define <16 x i8> @sqshlu16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3369,9 +3369,9 @@ define <16 x i8> @sqshlu16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -3386,7 +3386,7 @@ define <8 x i16> @sqshlu8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3396,9 +3396,9 @@ define <8 x i16> @sqshlu8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -3413,7 +3413,7 @@ define <4 x i32> @sqshlu4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3423,9 +3423,9 @@ define <4 x i32> @sqshlu4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -3440,7 +3440,7 @@ define <2 x i64> @sqshlu2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3450,9 +3450,9 @@ define <2 x i64> @sqshlu2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -3467,7 +3467,7 @@ define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3477,9 +3477,9 @@ define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -3494,7 +3494,7 @@ define i64 @sqshlu_i64_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3521,7 +3521,7 @@ define i32 @sqshlu_i32_constant(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3560,7 +3560,7 @@ define <8 x i8> @rshrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3587,7 +3587,7 @@ define <4 x i16> @rshrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3614,7 +3614,7 @@ define <2 x i32> @rshrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3642,7 +3642,7 @@ define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3653,7 +3653,7 @@ define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3685,7 +3685,7 @@ define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3696,7 +3696,7 @@ define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3728,7 +3728,7 @@ define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3739,7 +3739,7 @@ define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3774,7 +3774,7 @@ define <8 x i8> @shrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3784,9 +3784,9 @@ define <8 x i8> @shrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = lshr <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <8 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP8]] to <8 x i8> ; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 @@ -3804,7 +3804,7 @@ define <4 x i16> @shrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3814,9 +3814,9 @@ define <4 x i16> @shrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16> ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 @@ -3834,7 +3834,7 @@ define <2 x i32> @shrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3844,9 +3844,9 @@ define <2 x i32> @shrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[_MSLD]], splat (i64 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1) ; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 @@ -3865,7 +3865,7 @@ define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3876,7 +3876,7 @@ define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3886,9 +3886,9 @@ define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD1]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD1]], splat (i16 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP14]] to <8 x i8> ; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> ; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSPROP]], <16 x i32> @@ -3911,7 +3911,7 @@ define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3922,7 +3922,7 @@ define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3932,9 +3932,9 @@ define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD1]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD1]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16> ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> ; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSPROP]], <8 x i32> @@ -3957,7 +3957,7 @@ define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3968,7 +3968,7 @@ define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -3978,9 +3978,9 @@ define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD1]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD1]], splat (i64 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1) ; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP14]] to <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> ; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSPROP]], <4 x i32> @@ -4021,7 +4021,7 @@ define <8 x i8> @sqshrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4048,7 +4048,7 @@ define <4 x i16> @sqshrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4075,7 +4075,7 @@ define <2 x i32> @sqshrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4104,7 +4104,7 @@ define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4115,7 +4115,7 @@ define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4147,7 +4147,7 @@ define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4158,7 +4158,7 @@ define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4190,7 +4190,7 @@ define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4201,7 +4201,7 @@ define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4252,7 +4252,7 @@ define <8 x i8> @sqshrun8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4279,7 +4279,7 @@ define <4 x i16> @sqshrun4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4306,7 +4306,7 @@ define <2 x i32> @sqshrun2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4334,7 +4334,7 @@ define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4345,7 +4345,7 @@ define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4377,7 +4377,7 @@ define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4388,7 +4388,7 @@ define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4420,7 +4420,7 @@ define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4431,7 +4431,7 @@ define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4482,7 +4482,7 @@ define <8 x i8> @sqrshrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4509,7 +4509,7 @@ define <4 x i16> @sqrshrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4536,7 +4536,7 @@ define <2 x i32> @sqrshrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4564,7 +4564,7 @@ define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4575,7 +4575,7 @@ define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4607,7 +4607,7 @@ define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4618,7 +4618,7 @@ define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4650,7 +4650,7 @@ define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4661,7 +4661,7 @@ define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4712,7 +4712,7 @@ define <8 x i8> @sqrshrun8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4739,7 +4739,7 @@ define <4 x i16> @sqrshrun4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4766,7 +4766,7 @@ define <2 x i32> @sqrshrun2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4794,7 +4794,7 @@ define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4805,7 +4805,7 @@ define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4837,7 +4837,7 @@ define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4848,7 +4848,7 @@ define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4880,7 +4880,7 @@ define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4891,7 +4891,7 @@ define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4942,7 +4942,7 @@ define <8 x i8> @uqrshrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4969,7 +4969,7 @@ define <4 x i16> @uqrshrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -4996,7 +4996,7 @@ define <2 x i32> @uqrshrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5024,7 +5024,7 @@ define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5035,7 +5035,7 @@ define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5067,7 +5067,7 @@ define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5078,7 +5078,7 @@ define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5110,7 +5110,7 @@ define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5121,7 +5121,7 @@ define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5172,7 +5172,7 @@ define <8 x i8> @uqshrn8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5199,7 +5199,7 @@ define <4 x i16> @uqshrn4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5226,7 +5226,7 @@ define <2 x i32> @uqshrn2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5254,7 +5254,7 @@ define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5265,7 +5265,7 @@ define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5297,7 +5297,7 @@ define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5308,7 +5308,7 @@ define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5340,7 +5340,7 @@ define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5351,7 +5351,7 @@ define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5387,7 +5387,7 @@ define <8 x i16> @ushll8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5399,9 +5399,9 @@ define <8 x i16> @ushll8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], splat (i16 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1) ; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -5417,7 +5417,7 @@ define <4 x i32> @ushll4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5429,9 +5429,9 @@ define <4 x i32> @ushll4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -5447,7 +5447,7 @@ define <2 x i64> @ushll2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5459,9 +5459,9 @@ define <2 x i64> @ushll2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], splat (i64 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -5477,7 +5477,7 @@ define <8 x i16> @ushll2_8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5487,13 +5487,13 @@ define <8 x i16> @ushll2_8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> , <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1), <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i8> [[_MSPROP]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], splat (i16 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1) ; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -5510,7 +5510,7 @@ define <4 x i32> @ushll2_4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5520,13 +5520,13 @@ define <4 x i32> @ushll2_4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> , <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1), <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -5543,7 +5543,7 @@ define <2 x i64> @ushll2_2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5553,13 +5553,13 @@ define <2 x i64> @ushll2_2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> , <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1), <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <2 x i32> [[_MSPROP]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], splat (i64 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -5583,7 +5583,7 @@ define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5595,9 +5595,9 @@ define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -5613,7 +5613,7 @@ define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind sanitize_memory ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5648,7 +5648,7 @@ define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind sani ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5660,9 +5660,9 @@ define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind sani ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i8> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -5678,7 +5678,7 @@ define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5688,9 +5688,9 @@ define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -5705,7 +5705,7 @@ define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5717,9 +5717,9 @@ define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -5736,7 +5736,7 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5748,9 +5748,9 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -5765,9 +5765,9 @@ define <4 x i32> @neon.ushll4s_constant_fold() nounwind sanitize_memory { ; CHECK-LABEL: define <4 x i32> @neon.ushll4s_constant_fold( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> zeroinitializer, <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> , <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -5781,7 +5781,7 @@ define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5793,9 +5793,9 @@ define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[TMP2]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[TMP2]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -5811,7 +5811,7 @@ define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind sanitize_mem ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5823,9 +5823,9 @@ define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind sanitize_mem ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[TMP2]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -5841,7 +5841,7 @@ define i64 @neon.ushl_scalar_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5871,7 +5871,7 @@ define <8 x i16> @sshll8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5883,9 +5883,9 @@ define <8 x i16> @sshll8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], splat (i16 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1) ; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -5901,7 +5901,7 @@ define <2 x i64> @sshll2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5913,9 +5913,9 @@ define <2 x i64> @sshll2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], splat (i64 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -5938,7 +5938,7 @@ define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5948,9 +5948,9 @@ define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP2]] ; @@ -5965,7 +5965,7 @@ define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind sanitiz ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -5992,7 +5992,7 @@ define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6002,9 +6002,9 @@ define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -2)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -2)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP2]] ; @@ -6019,7 +6019,7 @@ define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6031,9 +6031,9 @@ define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[TMP2]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[TMP2]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -6049,7 +6049,7 @@ define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind sanitize ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6061,9 +6061,9 @@ define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind sanitize ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i8> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6079,7 +6079,7 @@ define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6091,9 +6091,9 @@ define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6109,7 +6109,7 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6121,9 +6121,9 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind sanitize_memo ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6138,9 +6138,9 @@ define <4 x i32> @neon.sshl4s_constant_fold() nounwind sanitize_memory { ; CHECK-LABEL: define <4 x i32> @neon.sshl4s_constant_fold( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> zeroinitializer, <4 x i32> splat (i32 2)) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> , <4 x i32> splat (i32 2)) ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6154,7 +6154,7 @@ define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6164,9 +6164,9 @@ define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6181,7 +6181,7 @@ define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6193,9 +6193,9 @@ define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -6211,7 +6211,7 @@ define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind sanitize_me ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6223,9 +6223,9 @@ define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind sanitize_me ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[TMP2]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 1)) ; CHECK-NEXT: store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; @@ -6241,7 +6241,7 @@ define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6271,7 +6271,7 @@ define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind sanitize_memory ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6300,9 +6300,9 @@ define <2 x i64> @neon.sshl2d_constant_fold() nounwind sanitize_memory { ; CHECK-LABEL: define <2 x i64> @neon.sshl2d_constant_fold( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> zeroinitializer, <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> zeroinitializer, <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> , <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> , <2 x i64> splat (i64 1)) ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -6316,7 +6316,7 @@ define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6326,9 +6326,9 @@ define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 2)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> splat (i64 2)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -6343,7 +6343,7 @@ define <8 x i16> @sshll2_8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6353,13 +6353,13 @@ define <8 x i16> @sshll2_8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> , <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1), <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i8> [[_MSPROP]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], splat (i16 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1) ; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -6376,7 +6376,7 @@ define <4 x i32> @sshll2_4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6386,13 +6386,13 @@ define <4 x i32> @sshll2_4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> , <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1), <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <4 x i16> [[_MSPROP]] to <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6409,7 +6409,7 @@ define <2 x i64> @sshll2_2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6419,13 +6419,13 @@ define <2 x i64> @sshll2_2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> , <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1), <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <2 x i32> [[_MSPROP]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], splat (i64 1) ; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -6442,7 +6442,7 @@ define <8 x i8> @sqshli8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6452,9 +6452,9 @@ define <8 x i8> @sqshli8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 1)) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] ; @@ -6469,7 +6469,7 @@ define <4 x i16> @sqshli4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6479,9 +6479,9 @@ define <4 x i16> @sqshli4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 1)) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; @@ -6496,7 +6496,7 @@ define <2 x i32> @sqshli2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6506,9 +6506,9 @@ define <2 x i32> @sqshli2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; @@ -6523,7 +6523,7 @@ define <16 x i8> @sqshli16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6533,9 +6533,9 @@ define <16 x i8> @sqshli16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -6550,7 +6550,7 @@ define <8 x i16> @sqshli8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6560,9 +6560,9 @@ define <8 x i16> @sqshli8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -6577,7 +6577,7 @@ define <4 x i32> @sqshli4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6587,9 +6587,9 @@ define <4 x i32> @sqshli4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6604,7 +6604,7 @@ define <2 x i64> @sqshli2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6614,9 +6614,9 @@ define <2 x i64> @sqshli2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -6631,7 +6631,7 @@ define <8 x i8> @uqshli8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6641,9 +6641,9 @@ define <8 x i8> @uqshli8b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 1)) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] ; @@ -6658,7 +6658,7 @@ define <8 x i8> @uqshli8b_1(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6668,9 +6668,9 @@ define <8 x i8> @uqshli8b_1(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 8)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 8)) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] ; @@ -6685,7 +6685,7 @@ define <4 x i16> @uqshli4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6695,9 +6695,9 @@ define <4 x i16> @uqshli4h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 1)) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; @@ -6712,7 +6712,7 @@ define <2 x i32> @uqshli2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6722,9 +6722,9 @@ define <2 x i32> @uqshli2s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; @@ -6739,7 +6739,7 @@ define <16 x i8> @uqshli16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6749,9 +6749,9 @@ define <16 x i8> @uqshli16b(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1)) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -6766,7 +6766,7 @@ define <8 x i16> @uqshli8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6776,9 +6776,9 @@ define <8 x i16> @uqshli8h(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -6793,7 +6793,7 @@ define <4 x i32> @uqshli4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6803,9 +6803,9 @@ define <4 x i32> @uqshli4s(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1)) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; @@ -6820,7 +6820,7 @@ define <2 x i64> @uqshli2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6830,9 +6830,9 @@ define <2 x i64> @uqshli2d(ptr %A) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -6848,7 +6848,7 @@ define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6858,11 +6858,11 @@ define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6891,7 +6891,7 @@ define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6901,11 +6901,11 @@ define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6934,7 +6934,7 @@ define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6944,11 +6944,11 @@ define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6977,7 +6977,7 @@ define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -6987,11 +6987,11 @@ define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7020,7 +7020,7 @@ define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7030,11 +7030,11 @@ define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7063,7 +7063,7 @@ define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7073,11 +7073,11 @@ define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7106,7 +7106,7 @@ define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7116,11 +7116,11 @@ define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7149,7 +7149,7 @@ define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7159,11 +7159,11 @@ define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7192,7 +7192,7 @@ define i64 @ursra_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7206,7 +7206,7 @@ define i64 @ursra_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP8]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 -1) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7235,7 +7235,7 @@ define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7245,11 +7245,11 @@ define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7278,7 +7278,7 @@ define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7288,11 +7288,11 @@ define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7321,7 +7321,7 @@ define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7331,11 +7331,11 @@ define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7364,7 +7364,7 @@ define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7374,11 +7374,11 @@ define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7407,7 +7407,7 @@ define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7417,11 +7417,11 @@ define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7450,7 +7450,7 @@ define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7460,11 +7460,11 @@ define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7493,7 +7493,7 @@ define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7503,11 +7503,11 @@ define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7536,7 +7536,7 @@ define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7546,11 +7546,11 @@ define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1)) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7579,7 +7579,7 @@ define i64 @srsra_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7593,7 +7593,7 @@ define i64 @srsra_scalar(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP8]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 -1) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7622,7 +7622,7 @@ define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7632,11 +7632,11 @@ define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7665,7 +7665,7 @@ define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7675,11 +7675,11 @@ define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7708,7 +7708,7 @@ define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7718,11 +7718,11 @@ define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <2 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <2 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7751,7 +7751,7 @@ define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7761,11 +7761,11 @@ define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <16 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <16 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7794,7 +7794,7 @@ define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7804,11 +7804,11 @@ define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7837,7 +7837,7 @@ define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7847,11 +7847,11 @@ define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7880,7 +7880,7 @@ define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7890,11 +7890,11 @@ define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <2 x i64> [[_MSLD]], splat (i64 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7923,7 +7923,7 @@ define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7933,11 +7933,11 @@ define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = lshr <1 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <1 x i64> [[_MSLD]], splat (i64 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <1 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7966,7 +7966,7 @@ define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -7976,11 +7976,11 @@ define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <8 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <8 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8009,7 +8009,7 @@ define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8019,11 +8019,11 @@ define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8052,7 +8052,7 @@ define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8062,11 +8062,11 @@ define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <2 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <2 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8095,7 +8095,7 @@ define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8105,11 +8105,11 @@ define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <16 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <16 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <16 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8138,7 +8138,7 @@ define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8148,11 +8148,11 @@ define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <8 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8181,7 +8181,7 @@ define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8191,11 +8191,11 @@ define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8224,7 +8224,7 @@ define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8234,11 +8234,11 @@ define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = ashr <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr <2 x i64> [[_MSLD]], splat (i64 1) ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 1) ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8267,7 +8267,7 @@ define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8278,7 +8278,7 @@ define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8288,11 +8288,11 @@ define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], splat (i8 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], splat (i8 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], splat (i8 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]] @@ -8316,7 +8316,7 @@ define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8327,7 +8327,7 @@ define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8337,11 +8337,11 @@ define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], splat (i16 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], splat (i16 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]] @@ -8365,7 +8365,7 @@ define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8376,7 +8376,7 @@ define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8386,11 +8386,11 @@ define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], splat (i32 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]] @@ -8414,7 +8414,7 @@ define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8425,7 +8425,7 @@ define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8435,11 +8435,11 @@ define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <16 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <16 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], splat (i8 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], splat (i8 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], splat (i8 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]] @@ -8463,7 +8463,7 @@ define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8474,7 +8474,7 @@ define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8484,11 +8484,11 @@ define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], splat (i16 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], splat (i16 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]] @@ -8512,7 +8512,7 @@ define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8523,7 +8523,7 @@ define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8533,11 +8533,11 @@ define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]] @@ -8561,7 +8561,7 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8572,7 +8572,7 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8582,11 +8582,11 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD]], splat (i64 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], splat (i64 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], splat (i64 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]] @@ -8610,7 +8610,7 @@ define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8621,7 +8621,7 @@ define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8631,11 +8631,11 @@ define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i8> [[TMP1]], splat (i8 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], splat (i8 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], splat (i8 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]] @@ -8659,7 +8659,7 @@ define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8670,7 +8670,7 @@ define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8680,11 +8680,11 @@ define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], splat (i16 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], splat (i16 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]] @@ -8708,7 +8708,7 @@ define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8719,7 +8719,7 @@ define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8729,11 +8729,11 @@ define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = shl <2 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <2 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], splat (i32 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]] @@ -8757,7 +8757,7 @@ define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8768,7 +8768,7 @@ define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8778,11 +8778,11 @@ define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = shl <16 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <16 x i8> [[_MSLD]], splat (i8 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i8> [[TMP1]], splat (i8 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], splat (i8 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], splat (i8 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]] @@ -8806,7 +8806,7 @@ define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8817,7 +8817,7 @@ define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8827,11 +8827,11 @@ define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i16> [[_MSLD]], splat (i16 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], splat (i16 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], splat (i16 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]] @@ -8855,7 +8855,7 @@ define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8866,7 +8866,7 @@ define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8876,11 +8876,11 @@ define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[_MSLD]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]] @@ -8904,7 +8904,7 @@ define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8915,7 +8915,7 @@ define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -8925,11 +8925,11 @@ define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[TMP13:%.*]] = shl <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <2 x i64> [[_MSLD]], splat (i64 1) ; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 1) +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], splat (i64 -1) +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], splat (i64 -1) ; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]] ; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]] ; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]] @@ -8953,9 +8953,9 @@ define <8 x i16> @shll(<8 x i8> %in) sanitize_memory { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> ; CHECK-NEXT: [[EXT:%.*]] = zext <8 x i8> [[IN]] to <8 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[_MSPROP]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[_MSPROP]], splat (i16 8) ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = shl <8 x i16> [[EXT]], +; CHECK-NEXT: [[RES:%.*]] = shl <8 x i16> [[EXT]], splat (i16 8) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[RES]] ; @@ -8969,13 +8969,13 @@ define <4 x i32> @shll_high(<8 x i16> %in) sanitize_memory { ; CHECK-SAME: <8 x i16> [[IN:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> , <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <4 x i32> ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i16> [[IN]], <8 x i16> undef, <4 x i32> ; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32> ; CHECK-NEXT: [[EXT:%.*]] = zext <4 x i16> [[EXTRACT]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[_MSPROP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[_MSPROP1]], splat (i32 16) ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = shl <4 x i32> [[EXT]], +; CHECK-NEXT: [[RES:%.*]] = shl <4 x i32> [[EXT]], splat (i32 16) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; @@ -8992,7 +8992,7 @@ define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9003,7 +9003,7 @@ define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9018,7 +9018,7 @@ define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9040,7 +9040,7 @@ define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9051,7 +9051,7 @@ define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9066,7 +9066,7 @@ define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9088,7 +9088,7 @@ define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9099,7 +9099,7 @@ define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9114,7 +9114,7 @@ define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9136,7 +9136,7 @@ define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9147,7 +9147,7 @@ define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9162,7 +9162,7 @@ define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9184,7 +9184,7 @@ define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9195,7 +9195,7 @@ define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9210,7 +9210,7 @@ define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9232,7 +9232,7 @@ define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9243,7 +9243,7 @@ define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9258,7 +9258,7 @@ define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9280,7 +9280,7 @@ define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9291,7 +9291,7 @@ define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9306,7 +9306,7 @@ define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9328,7 +9328,7 @@ define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9339,7 +9339,7 @@ define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9354,7 +9354,7 @@ define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] ; CHECK: 15: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9411,7 +9411,7 @@ define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9444,7 +9444,7 @@ define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9477,7 +9477,7 @@ define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9510,7 +9510,7 @@ define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9543,7 +9543,7 @@ define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sani ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9576,7 +9576,7 @@ define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) saniti ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9609,7 +9609,7 @@ define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) saniti ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -9675,7 +9675,7 @@ define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) sanitize_memory { ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[A]], +; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[A]], splat (i32 3) ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[_MSPROP]], i32 13) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[B]], i32 13) @@ -9690,5 +9690,5 @@ entry: declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) ;. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} ;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll index 1453e64abd5a22..b0c71dc8b0851e 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll @@ -214,7 +214,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 @@ -353,7 +353,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 @@ -429,7 +429,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> , i8 [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 ; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst.ll index 421f00fcbc56bf..deeb1d4b6ff859 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst.ll @@ -24,7 +24,7 @@ define void @st2_8b(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind sanitize_memory { ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] ; CHECK-NEXT: unreachable @@ -47,9 +47,9 @@ define void @st2_8b_undefA(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind sanitize_m ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> , <8 x i8> [[TMP2]], ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -72,9 +72,9 @@ define void @st2_8b_undefB(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind sanitize_m ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -96,9 +96,9 @@ define void @st2_8b_undefAB(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind sanitize_ ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> , <8 x i8> , ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP4]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -125,7 +125,7 @@ define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwind sani ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -149,9 +149,9 @@ define void @st3_8b_undefA(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwi ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> , <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -175,9 +175,9 @@ define void @st3_8b_undefB(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwi ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , <8 x i8> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -201,9 +201,9 @@ define void @st3_8b_undefC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwi ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> , ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> splat (i8 -1), ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -226,9 +226,9 @@ define void @st3_8b_undefAB(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounw ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> , <8 x i8> , <8 x i8> [[TMP2]], ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -251,9 +251,9 @@ define void @st3_8b_undefAC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounw ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> , <8 x i8> [[TMP2]], <8 x i8> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -276,9 +276,9 @@ define void @st3_8b_undefBC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounw ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , <8 x i8> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -300,9 +300,9 @@ define void @st3_8b_undefABC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) noun ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> , <8 x i8> , <8 x i8> , ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP4]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -330,7 +330,7 @@ define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %P) ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -355,9 +355,9 @@ define void @st4_8b_undefA(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, p ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -382,9 +382,9 @@ define void @st4_8b_undefB(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, p ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -409,9 +409,9 @@ define void @st4_8b_undefC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, p ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> , <8 x i8> [[TMP4]], ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -436,9 +436,9 @@ define void @st4_8b_undefD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, p ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> , ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> splat (i8 -1), ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -462,9 +462,9 @@ define void @st4_8b_undefAB(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> , <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -488,9 +488,9 @@ define void @st4_8b_undefAC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> [[TMP2]], <8 x i8> , <8 x i8> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -514,9 +514,9 @@ define void @st4_8b_undefBC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , <8 x i8> , <8 x i8> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -540,9 +540,9 @@ define void @st4_8b_undefBD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , <8 x i8> [[TMP3]], <8 x i8> , ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], <8 x i8> splat (i8 -1), ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -565,9 +565,9 @@ define void @st4_8b_undefABC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> , <8 x i8> , <8 x i8> [[TMP2]], ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -590,9 +590,9 @@ define void @st4_8b_undefABD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> , <8 x i8> [[TMP2]], <8 x i8> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -615,9 +615,9 @@ define void @st4_8b_undefACD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> [[TMP2]], <8 x i8> , <8 x i8> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -640,9 +640,9 @@ define void @st4_8b_undefBCD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> , <8 x i8> , <8 x i8> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -664,9 +664,9 @@ define void @st4_8b_undefABCD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> , <8 x i8> , <8 x i8> , <8 x i8> , ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP4]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -698,7 +698,7 @@ define void @st2_16b(<16 x i8> %A, <16 x i8> %B, ptr %P) nounwind sanitize_memor ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -725,7 +725,7 @@ define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %P) nounwind ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -753,7 +753,7 @@ define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -785,7 +785,7 @@ define void @st2_4h(<4 x i16> %A, <4 x i16> %B, ptr %P) nounwind sanitize_memory ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -812,7 +812,7 @@ define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %P) nounwind s ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i16> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -840,7 +840,7 @@ define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -872,7 +872,7 @@ define void @st2_8h(<8 x i16> %A, <8 x i16> %B, ptr %P) nounwind sanitize_memory ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -899,7 +899,7 @@ define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %P) nounwind s ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -927,7 +927,7 @@ define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -959,7 +959,7 @@ define void @st2_2s(<2 x i32> %A, <2 x i32> %B, ptr %P) nounwind sanitize_memory ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -986,7 +986,7 @@ define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %P) nounwind s ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1014,7 +1014,7 @@ define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1044,7 +1044,7 @@ define void @st2_4s(<4 x i32> %A, <4 x i32> %B, ptr %P) nounwind sanitize_memory ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1071,7 +1071,7 @@ define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %P) nounwind s ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1099,7 +1099,7 @@ define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1132,7 +1132,7 @@ define void @st2_1d(<1 x i64> %A, <1 x i64> %B, ptr %P) nounwind sanitize_memory ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1159,7 +1159,7 @@ define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %P) nounwind s ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1187,7 +1187,7 @@ define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1219,7 +1219,7 @@ define void @st2_2d(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind sanitize_memory ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1242,9 +1242,9 @@ define void @st2_2d_undefA(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind sanitize ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1267,9 +1267,9 @@ define void @st2_2d_undefB(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind sanitize ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1291,9 +1291,9 @@ define void @st2_2d_undefAB(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind sanitiz ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> , <2 x i64> , ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP4]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1320,7 +1320,7 @@ define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nounwind s ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1344,9 +1344,9 @@ define void @st3_2d_undefA(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nou ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1370,9 +1370,9 @@ define void @st3_2d_undefB(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nou ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , <2 x i64> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1396,9 +1396,9 @@ define void @st3_2d_undefC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nou ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> , ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1421,9 +1421,9 @@ define void @st3_2d_undefAB(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) no ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> , <2 x i64> , <2 x i64> [[TMP2]], ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1446,9 +1446,9 @@ define void @st3_2d_undefAC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) no ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], <2 x i64> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1471,9 +1471,9 @@ define void @st3_2d_undefBC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) no ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , <2 x i64> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1495,9 +1495,9 @@ define void @st3_2d_undefABC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) n ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> , <2 x i64> , <2 x i64> , ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP4]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1525,7 +1525,7 @@ define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], ptr [[TMP8]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1554,9 +1554,9 @@ define void @st4_2d_undefA(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> % ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1581,9 +1581,9 @@ define void @st4_2d_undefB(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> % ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1608,9 +1608,9 @@ define void @st4_2d_undefC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> % ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> , <2 x i64> [[TMP4]], ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP4]], ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1635,9 +1635,9 @@ define void @st4_2d_undefD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> % ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> , ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), ptr [[TMP7]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1661,9 +1661,9 @@ define void @st4_2d_undefAB(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> , <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1687,9 +1687,9 @@ define void @st4_2d_undefAC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], <2 x i64> , <2 x i64> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1713,9 +1713,9 @@ define void @st4_2d_undefAD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> , ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1739,9 +1739,9 @@ define void @st4_2d_undefBC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , <2 x i64> , <2 x i64> [[TMP3]], ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1765,9 +1765,9 @@ define void @st4_2d_undefBD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , <2 x i64> [[TMP3]], <2 x i64> , ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1791,9 +1791,9 @@ define void @st4_2d_undefCD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> , <2 x i64> , ptr [[TMP6]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP6]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 7: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1816,9 +1816,9 @@ define void @st4_2d_undefABC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> , <2 x i64> , <2 x i64> [[TMP2]], ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1841,9 +1841,9 @@ define void @st4_2d_undefABD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> , <2 x i64> [[TMP2]], <2 x i64> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1866,9 +1866,9 @@ define void @st4_2d_undefACD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> [[TMP2]], <2 x i64> , <2 x i64> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1891,9 +1891,9 @@ define void @st4_2d_undefBCD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> , <2 x i64> , <2 x i64> , ptr [[TMP5]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP5]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1915,9 +1915,9 @@ define void @st4_2d_undefABCD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> , <2 x i64> , <2 x i64> , <2 x i64> , ptr [[TMP4]]) +; CHECK-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP4]]) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1929,5 +1929,5 @@ define void @st4_2d_undefABCD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64 ret void } ;. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} ;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll index 3d5e4005de5b10..5bb2c3f0e233b9 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll @@ -43,9 +43,9 @@ define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> % ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[A2:%.*]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], splat (i64 63) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], splat (i64 63) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x double> [[A1:%.*]] to <4 x i64> @@ -71,9 +71,9 @@ define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[A2:%.*]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], splat (i32 31) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i32> [[TMP5]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x float> [[A1:%.*]] to <8 x i32> @@ -305,7 +305,7 @@ define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -326,7 +326,7 @@ define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -347,7 +347,7 @@ define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -368,7 +368,7 @@ define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -389,7 +389,7 @@ define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -503,7 +503,7 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(ptr %a0) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -527,7 +527,7 @@ define <2 x double> @test_x86_avx_maskload_pd(ptr %a0, <2 x i64> %mask) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -551,7 +551,7 @@ define <4 x double> @test_x86_avx_maskload_pd_256(ptr %a0, <4 x i64> %mask) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -575,7 +575,7 @@ define <4 x float> @test_x86_avx_maskload_ps(ptr %a0, <4 x i32> %mask) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -599,7 +599,7 @@ define <8 x float> @test_x86_avx_maskload_ps_256(ptr %a0, <8 x i32> %mask) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -627,7 +627,7 @@ define void @test_x86_avx_maskstore_pd(ptr %a0, <2 x i64> %mask, <2 x double> %a ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -654,7 +654,7 @@ define void @test_x86_avx_maskstore_pd_256(ptr %a0, <4 x i64> %mask, <4 x double ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -681,7 +681,7 @@ define void @test_x86_avx_maskstore_ps(ptr %a0, <4 x i32> %mask, <4 x float> %a2 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -708,7 +708,7 @@ define void @test_x86_avx_maskstore_ps_256(ptr %a0, <8 x i32> %mask, <8 x float> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -792,7 +792,7 @@ define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -813,7 +813,7 @@ define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -905,7 +905,7 @@ define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -926,7 +926,7 @@ define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -964,7 +964,7 @@ define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -989,7 +989,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1009,7 +1009,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1032,7 +1032,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) # ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1050,7 +1050,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1065,7 +1065,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1091,7 +1091,7 @@ define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1363,11 +1363,11 @@ define void @movnt_dq(ptr %p, <2 x i64> %a1) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[A2:%.*]] = add <2 x i64> [[A1:%.*]], -; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> , <4 x i32> +; CHECK-NEXT: [[A2:%.*]] = add <2 x i64> [[A1:%.*]], splat (i64 1) +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> splat (i64 -1), <4 x i32> ; CHECK-NEXT: [[A3:%.*]] = shufflevector <2 x i64> [[A2]], <2 x i64> undef, <4 x i32> ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1376,7 +1376,7 @@ define void @movnt_dq(ptr %p, <2 x i64> %a1) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr [[TMP7]], align 32 -; CHECK-NEXT: store <4 x i64> [[A3]], ptr [[P]], align 32, !nontemporal [[META1:![0-9]+]] +; CHECK-NEXT: store <4 x i64> [[A3]], ptr [[P]], align 32, !nontemporal [[META2:![0-9]+]] ; CHECK-NEXT: ret void ; %a2 = add <2 x i64> %a1, @@ -1392,7 +1392,7 @@ define void @movnt_ps(ptr %p, <8 x float> %a) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1401,7 +1401,7 @@ define void @movnt_ps(ptr %p, <8 x float> %a) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[TMP7]], align 32 -; CHECK-NEXT: store <8 x float> [[A:%.*]], ptr [[P]], align 32, !nontemporal [[META1]] +; CHECK-NEXT: store <8 x float> [[A:%.*]], ptr [[P]], align 32, !nontemporal [[META2]] ; CHECK-NEXT: ret void ; tail call void @llvm.x86.avx.movnt.ps.256(ptr %p, <8 x float> %a) nounwind @@ -1418,7 +1418,7 @@ define void @movnt_pd(ptr %p, <4 x double> %a1) nounwind #0 { ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[A2:%.*]] = fadd <4 x double> [[A1:%.*]], zeroinitializer ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -1427,7 +1427,7 @@ define void @movnt_pd(ptr %p, <4 x double> %a1) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr [[TMP7]], align 32 -; CHECK-NEXT: store <4 x double> [[A2]], ptr [[P]], align 32, !nontemporal [[META1]] +; CHECK-NEXT: store <4 x double> [[A2]], ptr [[P]], align 32, !nontemporal [[META2]] ; CHECK-NEXT: ret void ; %a2 = fadd <4 x double> %a1, diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll index 5efb7eb4078987..1602e85d8516d2 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll @@ -160,7 +160,7 @@ define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i8> [[TMP1]] to i256 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: unreachable @@ -216,7 +216,7 @@ define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <4 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], splat (i64 48) ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[RES]] @@ -489,7 +489,7 @@ define <16 x i16> @test_x86_avx2_psrl_w_load(<16 x i16> %a0, ptr %p) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -684,7 +684,7 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -797,7 +797,7 @@ define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -817,7 +817,7 @@ define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -832,7 +832,7 @@ define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP9]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -885,9 +885,9 @@ define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], +; CHECK-NEXT: [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], splat (i8 7) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i8> [[TMP4]] to <32 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], splat (i8 7) ; CHECK-NEXT: [[TMP7:%.*]] = trunc <32 x i8> [[TMP6]] to <32 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP2]], <32 x i8> [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i8> [[A1:%.*]], [[A0:%.*]] @@ -978,7 +978,7 @@ define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1002,7 +1002,7 @@ define <2 x i64> @test_x86_avx2_maskload_q(ptr %a0, <2 x i64> %a1) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1026,7 +1026,7 @@ define <4 x i64> @test_x86_avx2_maskload_q_256(ptr %a0, <4 x i64> %a1) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1050,7 +1050,7 @@ define <4 x i32> @test_x86_avx2_maskload_d(ptr %a0, <4 x i32> %a1) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1074,7 +1074,7 @@ define <8 x i32> @test_x86_avx2_maskload_d_256(ptr %a0, <8 x i32> %a1) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1102,7 +1102,7 @@ define void @test_x86_avx2_maskstore_q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2) #0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1129,7 +1129,7 @@ define void @test_x86_avx2_maskstore_q_256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1156,7 +1156,7 @@ define void @test_x86_avx2_maskstore_d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2) #0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1183,7 +1183,7 @@ define void @test_x86_avx2_maskstore_d_256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1438,7 +1438,7 @@ define <2 x i64> @test_x86_avx2_psrlv_q_const() #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> zeroinitializer, <2 x i64> ) ; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> , <2 x i64> ) +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> splat (i64 4), <2 x i64> ) ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -1471,7 +1471,7 @@ define <4 x i64> @test_x86_avx2_psrlv_q_256_const() #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> zeroinitializer, <4 x i64> ) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> , <4 x i64> ) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> splat (i64 4), <4 x i64> ) ; CHECK-NEXT: store <4 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[RES]] ; @@ -1560,7 +1560,7 @@ define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, ptr %a1, <4 x i ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1593,7 +1593,7 @@ define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, ptr %a1, <4 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1626,7 +1626,7 @@ define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, ptr %a1, <2 x i ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1659,7 +1659,7 @@ define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, ptr %a1, <4 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1692,7 +1692,7 @@ define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, ptr %a1, <4 x i32 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1725,7 +1725,7 @@ define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, ptr %a1, <8 x ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1758,7 +1758,7 @@ define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, ptr %a1, <2 x i64 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1791,7 +1791,7 @@ define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, ptr %a1, <4 x ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1824,7 +1824,7 @@ define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, ptr %a1, <4 x i32> %id ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1857,7 +1857,7 @@ define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, ptr %a1, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1890,7 +1890,7 @@ define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, ptr %a1, <2 x i64> %id ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1923,7 +1923,7 @@ define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, ptr %a1, <4 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1956,7 +1956,7 @@ define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, ptr %a1, <4 x i32> %id ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -1989,7 +1989,7 @@ define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, ptr %a1, <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -2022,7 +2022,7 @@ define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, ptr %a1, <2 x i64> %id ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -2055,7 +2055,7 @@ define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, ptr %a1, <4 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -2089,14 +2089,14 @@ define <8 x float> @test_gather_mask(<8 x float> %a0, ptr %a, <8 x i32> %idx, < ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; CHECK: 9: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable ; CHECK: 10: ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> [[A0:%.*]], ptr [[A:%.*]], <8 x i32> [[IDX:%.*]], <8 x float> [[MASK:%.*]], i8 4) ; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP5]], 0 -; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] ; CHECK: 11: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -2136,7 +2136,7 @@ define <2 x i64> @test_mask_demanded_bits(<2 x i64> %a0, ptr %a1, <2 x i64> %idx ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSPROP]] to i128 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] -; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll index e9323f6dd33083..9d075f7974cd91 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll @@ -162,9 +162,9 @@ define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) @@ -183,9 +183,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) @@ -206,9 +206,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 3: ; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 @@ -218,9 +218,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 9: ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]]) @@ -243,9 +243,9 @@ define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) @@ -263,9 +263,9 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) @@ -283,9 +283,9 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 3: ; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 @@ -295,9 +295,9 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 9: ; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0]]) @@ -317,9 +317,9 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]]) @@ -338,9 +338,9 @@ define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[A0:%.*]]) @@ -361,9 +361,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) #0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 6: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1:%.*]]) @@ -382,9 +382,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, ptr %p1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16 @@ -395,9 +395,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, ptr %p1) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 11: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]]) @@ -416,9 +416,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, ptr %p1 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16 @@ -429,9 +429,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, ptr %p1 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 11: ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]]) @@ -450,9 +450,9 @@ define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) @@ -471,9 +471,9 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) @@ -494,9 +494,9 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 3: ; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 @@ -506,9 +506,9 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 9: ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0]]) @@ -531,9 +531,9 @@ define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> [[A0:%.*]]) @@ -552,9 +552,9 @@ define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[A0:%.*]]) @@ -639,9 +639,9 @@ define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> [[A0:%.*]]) @@ -810,9 +810,9 @@ define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> [[A0:%.*]]) @@ -866,7 +866,7 @@ define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], splat (i64 48) ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]]) ; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[RES]] @@ -1139,9 +1139,9 @@ define <8 x i16> @test_x86_sse2_psrl_w_load(<8 x i16> %a0, ptr %p) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[A1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16 @@ -1365,9 +1365,9 @@ define void @clflush(ptr %p) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] ; CHECK: 2: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable ; CHECK: 3: ; CHECK-NEXT: tail call void @llvm.x86.sse2.clflush(ptr [[P:%.*]]) diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll index a71455821bd6bd..f5e8452fd0d585 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll @@ -11,9 +11,9 @@ define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[A2:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], splat (i64 63) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 63) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <2 x i64> [[TMP7]] to <2 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x double> [[A1:%.*]] to <2 x i64> @@ -39,9 +39,9 @@ define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[A2:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], splat (i32 31) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[A1:%.*]] to <4 x i32> @@ -112,7 +112,7 @@ define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) #0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -138,7 +138,7 @@ define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -158,7 +158,7 @@ define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(ptr %ptr, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -173,7 +173,7 @@ define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(ptr %ptr, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 10: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -226,9 +226,9 @@ define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], +; CHECK-NEXT: [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], splat (i8 7) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i8> [[TMP4]] to <16 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], splat (i8 7) ; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i8> [[TMP6]] to <16 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i8> [[A1:%.*]], [[A0:%.*]] @@ -322,7 +322,7 @@ define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -343,7 +343,7 @@ define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -380,7 +380,7 @@ define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, ptr %a1) #0 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable @@ -407,7 +407,7 @@ define <4 x float> @test_x86_sse41_round_ss_load(<4 x float> %a0, ptr %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-x86.ll index 50065e25970659..5bb2c3f0e233b9 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-x86.ll @@ -43,9 +43,9 @@ define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> % ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[A2:%.*]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], splat (i64 63) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], splat (i64 63) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x double> [[A1:%.*]] to <4 x i64> @@ -71,9 +71,9 @@ define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[A2:%.*]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], splat (i32 31) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i32> [[TMP5]] to <8 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x float> [[A1:%.*]] to <8 x i32> @@ -1363,8 +1363,8 @@ define void @movnt_dq(ptr %p, <2 x i64> %a1) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[A2:%.*]] = add <2 x i64> [[A1:%.*]], -; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> , <4 x i32> +; CHECK-NEXT: [[A2:%.*]] = add <2 x i64> [[A1:%.*]], splat (i64 1) +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> splat (i64 -1), <4 x i32> ; CHECK-NEXT: [[A3:%.*]] = shufflevector <2 x i64> [[A2]], <2 x i64> undef, <4 x i32> ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-x86.ll index 6d9a78be2c2097..1602e85d8516d2 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-x86.ll @@ -216,7 +216,7 @@ define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <4 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], splat (i64 48) ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[RES]] @@ -885,9 +885,9 @@ define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], +; CHECK-NEXT: [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], splat (i8 7) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i8> [[TMP4]] to <32 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], splat (i8 7) ; CHECK-NEXT: [[TMP7:%.*]] = trunc <32 x i8> [[TMP6]] to <32 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP2]], <32 x i8> [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i8> [[A1:%.*]], [[A0:%.*]] @@ -1438,7 +1438,7 @@ define <2 x i64> @test_x86_avx2_psrlv_q_const() #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> zeroinitializer, <2 x i64> ) ; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> , <2 x i64> ) +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> splat (i64 4), <2 x i64> ) ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -1471,7 +1471,7 @@ define <4 x i64> @test_x86_avx2_psrlv_q_256_const() #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> zeroinitializer, <4 x i64> ) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> , <4 x i64> ) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> splat (i64 4), <4 x i64> ) ; CHECK-NEXT: store <4 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i64> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-x86.ll index 8b8ca74f8e6dcf..9d075f7974cd91 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-x86.ll @@ -866,7 +866,7 @@ define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], splat (i64 48) ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]]) ; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[RES]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse41-intrinsics-x86.ll index 3a1e27d5a09d1c..f5e8452fd0d585 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/sse41-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse41-intrinsics-x86.ll @@ -11,9 +11,9 @@ define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[A2:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], splat (i64 63) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 63) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <2 x i64> [[TMP7]] to <2 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x double> [[A1:%.*]] to <2 x i64> @@ -39,9 +39,9 @@ define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[A2:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], splat (i32 31) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[A1:%.*]] to <4 x i32> @@ -226,9 +226,9 @@ define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], +; CHECK-NEXT: [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], splat (i8 7) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i8> [[TMP4]] to <16 x i1> -; CHECK-NEXT: [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], splat (i8 7) ; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i8> [[TMP6]] to <16 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i8> [[A1:%.*]], [[A0:%.*]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll b/llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll index 24fde53eba63a9..ff37605acaddd9 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll @@ -42,7 +42,7 @@ define void @Store(ptr %p, <4 x i64> %v, <4 x i1> %mask) sanitize_memory { ; ADDR-NEXT: [[TMP6:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 ; ADDR-NEXT: [[_MSCMP1:%.*]] = icmp ne i4 [[TMP6]], 0 ; ADDR-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0:![0-9]+]] +; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1:![0-9]+]] ; ADDR: 7: ; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR7:[0-9]+]] ; ADDR-NEXT: unreachable @@ -112,7 +112,7 @@ define <4 x double> @Load(ptr %p, <4 x double> %v, <4 x i1> %mask) sanitize_memo ; ADDR-NEXT: [[TMP6:%.*]] = bitcast <4 x i1> [[TMP1]] to i4 ; ADDR-NEXT: [[_MSCMP1:%.*]] = icmp ne i4 [[TMP6]], 0 ; ADDR-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]] +; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; ADDR: 7: ; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] ; ADDR-NEXT: unreachable @@ -238,7 +238,7 @@ define <16 x float> @Gather(<16 x ptr> %ptrs, <16 x i1> %mask, <16 x float> %pas ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <16 x ptr> [[PTRS:%.*]] to <16 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = xor <16 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <16 x i64> [[TMP2]], splat (i64 87960930222080) ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <16 x i64> [[TMP3]] to <16 x ptr> ; CHECK-NEXT: [[_MSMASKEDGATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP4]], i32 4, <16 x i1> [[MASK:%.*]], <16 x i32> [[TMP1]]) ; CHECK-NEXT: [[RET:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[PTRS]], i32 4, <16 x i1> [[MASK]], <16 x float> [[PASSTHRU:%.*]]) @@ -252,7 +252,7 @@ define <16 x float> @Gather(<16 x ptr> %ptrs, <16 x i1> %mask, <16 x float> %pas ; ADDR-NEXT: call void @llvm.donothing() ; ADDR-NEXT: [[_MSMASKEDPTRS:%.*]] = select <16 x i1> [[MASK:%.*]], <16 x i64> [[TMP2]], <16 x i64> zeroinitializer ; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint <16 x ptr> [[PTRS:%.*]] to <16 x i64> -; ADDR-NEXT: [[TMP5:%.*]] = xor <16 x i64> [[TMP4]], +; ADDR-NEXT: [[TMP5:%.*]] = xor <16 x i64> [[TMP4]], splat (i64 87960930222080) ; ADDR-NEXT: [[TMP6:%.*]] = inttoptr <16 x i64> [[TMP5]] to <16 x ptr> ; ADDR-NEXT: [[_MSMASKEDGATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP6]], i32 4, <16 x i1> [[MASK]], <16 x i32> [[TMP3]]) ; ADDR-NEXT: [[TMP7:%.*]] = bitcast <16 x i1> [[TMP1]] to i16 @@ -260,7 +260,7 @@ define <16 x float> @Gather(<16 x ptr> %ptrs, <16 x i1> %mask, <16 x float> %pas ; ADDR-NEXT: [[TMP8:%.*]] = bitcast <16 x i64> [[_MSMASKEDPTRS]] to i1024 ; ADDR-NEXT: [[_MSCMP1:%.*]] = icmp ne i1024 [[TMP8]], 0 ; ADDR-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; ADDR: 9: ; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] ; ADDR-NEXT: unreachable @@ -274,9 +274,9 @@ define <16 x float> @Gather(<16 x ptr> %ptrs, <16 x i1> %mask, <16 x float> %pas ; ORIGINS-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 136) to ptr), align 4 ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP3:%.*]] = ptrtoint <16 x ptr> [[PTRS:%.*]] to <16 x i64> -; ORIGINS-NEXT: [[TMP4:%.*]] = xor <16 x i64> [[TMP3]], +; ORIGINS-NEXT: [[TMP4:%.*]] = xor <16 x i64> [[TMP3]], splat (i64 87960930222080) ; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr <16 x i64> [[TMP4]] to <16 x ptr> -; ORIGINS-NEXT: [[TMP6:%.*]] = add <16 x i64> [[TMP4]], +; ORIGINS-NEXT: [[TMP6:%.*]] = add <16 x i64> [[TMP4]], splat (i64 17592186044416) ; ORIGINS-NEXT: [[TMP7:%.*]] = inttoptr <16 x i64> [[TMP6]] to <16 x ptr> ; ORIGINS-NEXT: [[_MSMASKEDGATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP5]], i32 4, <16 x i1> [[MASK:%.*]], <16 x i32> [[TMP1]]) ; ORIGINS-NEXT: [[RET:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[PTRS]], i32 4, <16 x i1> [[MASK]], <16 x float> [[PASSTHRU:%.*]]) @@ -319,7 +319,7 @@ define void @Scatter(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask) sanitize ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 87960930222080) ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <8 x i64> [[TMP3]] to <8 x ptr> ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP1]], <8 x ptr> [[TMP4]], i32 8, <8 x i1> [[MASK:%.*]]) ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]]) @@ -332,7 +332,7 @@ define void @Scatter(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask) sanitize ; ADDR-NEXT: call void @llvm.donothing() ; ADDR-NEXT: [[_MSMASKEDPTRS:%.*]] = select <8 x i1> [[MASK:%.*]], <8 x i64> [[TMP2]], <8 x i64> zeroinitializer ; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64> -; ADDR-NEXT: [[TMP5:%.*]] = xor <8 x i64> [[TMP4]], +; ADDR-NEXT: [[TMP5:%.*]] = xor <8 x i64> [[TMP4]], splat (i64 87960930222080) ; ADDR-NEXT: [[TMP6:%.*]] = inttoptr <8 x i64> [[TMP5]] to <8 x ptr> ; ADDR-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP3]], <8 x ptr> [[TMP6]], i32 8, <8 x i1> [[MASK]]) ; ADDR-NEXT: [[TMP7:%.*]] = bitcast <8 x i1> [[TMP1]] to i8 @@ -340,7 +340,7 @@ define void @Scatter(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask) sanitize ; ADDR-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[_MSMASKEDPTRS]] to i512 ; ADDR-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 ; ADDR-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] ; ADDR: 9: ; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] ; ADDR-NEXT: unreachable @@ -353,9 +353,9 @@ define void @Scatter(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask) sanitize ; ORIGINS-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP3:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64> -; ORIGINS-NEXT: [[TMP4:%.*]] = xor <8 x i64> [[TMP3]], +; ORIGINS-NEXT: [[TMP4:%.*]] = xor <8 x i64> [[TMP3]], splat (i64 87960930222080) ; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr <8 x i64> [[TMP4]] to <8 x ptr> -; ORIGINS-NEXT: [[TMP6:%.*]] = add <8 x i64> [[TMP4]], +; ORIGINS-NEXT: [[TMP6:%.*]] = add <8 x i64> [[TMP4]], splat (i64 17592186044416) ; ORIGINS-NEXT: [[TMP7:%.*]] = inttoptr <8 x i64> [[TMP6]] to <8 x ptr> ; ORIGINS-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP1]], <8 x ptr> [[TMP5]], i32 8, <8 x i1> [[MASK:%.*]]) ; ORIGINS-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]]) @@ -369,7 +369,7 @@ define void @ScatterNoSanitize(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask ; CHECK-LABEL: @ScatterNoSanitize( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64> -; CHECK-NEXT: [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 87960930222080) ; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <8 x i64> [[TMP2]] to <8 x ptr> ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> zeroinitializer, <8 x ptr> [[TMP3]], i32 8, <8 x i1> [[MASK:%.*]]) ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]]) @@ -379,7 +379,7 @@ define void @ScatterNoSanitize(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask ; ADDR-NEXT: call void @llvm.donothing() ; ADDR-NEXT: [[_MSMASKEDPTRS:%.*]] = select <8 x i1> [[MASK:%.*]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer ; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64> -; ADDR-NEXT: [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], +; ADDR-NEXT: [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 87960930222080) ; ADDR-NEXT: [[TMP3:%.*]] = inttoptr <8 x i64> [[TMP2]] to <8 x ptr> ; ADDR-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> zeroinitializer, <8 x ptr> [[TMP3]], i32 8, <8 x i1> [[MASK]]) ; ADDR-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]]) @@ -388,9 +388,9 @@ define void @ScatterNoSanitize(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask ; ORIGINS-LABEL: @ScatterNoSanitize( ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64> -; ORIGINS-NEXT: [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], +; ORIGINS-NEXT: [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 87960930222080) ; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr <8 x i64> [[TMP2]] to <8 x ptr> -; ORIGINS-NEXT: [[TMP4:%.*]] = add <8 x i64> [[TMP2]], +; ORIGINS-NEXT: [[TMP4:%.*]] = add <8 x i64> [[TMP2]], splat (i64 17592186044416) ; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr <8 x i64> [[TMP4]] to <8 x ptr> ; ORIGINS-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> zeroinitializer, <8 x ptr> [[TMP3]], i32 8, <8 x i1> [[MASK:%.*]]) ; ORIGINS-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]]) @@ -426,7 +426,7 @@ define <16 x float> @ExpandLoad(ptr %ptr, <16 x i1> %mask, <16 x float> %passthr ; ADDR-NEXT: [[TMP7:%.*]] = bitcast <16 x i1> [[TMP2]] to i16 ; ADDR-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP7]], 0 ; ADDR-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ADDR: 8: ; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] ; ADDR-NEXT: unreachable @@ -504,7 +504,7 @@ define void @CompressStore(<16 x float> %value, ptr %ptr, <16 x i1> %mask) sanit ; ADDR-NEXT: [[TMP7:%.*]] = bitcast <16 x i1> [[TMP2]] to i16 ; ADDR-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP7]], 0 ; ADDR-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; ADDR-NEXT: br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ADDR: 8: ; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] ; ADDR-NEXT: unreachable diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll index 8746f7f19023e4..3ce200e2222df1 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll @@ -2086,12 +2086,12 @@ define <2 x i1> @ICmpSLT_vector_Zero(<2 x ptr> %x) nounwind uwtable readnone san ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <2 x ptr> [[X]] to <2 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) ; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <2 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <2 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <2 x i64> [[TMP5]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <2 x i64> [[TMP6]], splat (i64 -9223372036854775808) ; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP9]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp slt <2 x ptr> [[X]], zeroinitializer ; CHECK-NEXT: store <2 x i1> [[TMP17]], ptr @__msan_retval_tls, align 8 @@ -2103,12 +2103,12 @@ define <2 x i1> @ICmpSLT_vector_Zero(<2 x ptr> %x) nounwind uwtable readnone san ; ORIGIN-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 ; ORIGIN-NEXT: call void @llvm.donothing() ; ORIGIN-NEXT: [[TMP3:%.*]] = ptrtoint <2 x ptr> [[X]] to <2 x i64> -; ORIGIN-NEXT: [[TMP4:%.*]] = xor <2 x i64> [[TMP3]], -; ORIGIN-NEXT: [[TMP5:%.*]] = xor <2 x i64> [[TMP1]], +; ORIGIN-NEXT: [[TMP4:%.*]] = xor <2 x i64> [[TMP3]], splat (i64 -9223372036854775808) +; ORIGIN-NEXT: [[TMP5:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) ; ORIGIN-NEXT: [[TMP6:%.*]] = and <2 x i64> [[TMP4]], [[TMP5]] ; ORIGIN-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP4]], [[TMP1]] -; ORIGIN-NEXT: [[TMP10:%.*]] = icmp ult <2 x i64> [[TMP6]], -; ORIGIN-NEXT: [[TMP17:%.*]] = icmp ult <2 x i64> [[TMP7]], +; ORIGIN-NEXT: [[TMP10:%.*]] = icmp ult <2 x i64> [[TMP6]], splat (i64 -9223372036854775808) +; ORIGIN-NEXT: [[TMP17:%.*]] = icmp ult <2 x i64> [[TMP7]], splat (i64 -9223372036854775808) ; ORIGIN-NEXT: [[TMP18:%.*]] = xor <2 x i1> [[TMP10]], [[TMP17]] ; ORIGIN-NEXT: [[TMP19:%.*]] = icmp slt <2 x ptr> [[X]], zeroinitializer ; ORIGIN-NEXT: store <2 x i1> [[TMP18]], ptr @__msan_retval_tls, align 8 @@ -2121,12 +2121,12 @@ define <2 x i1> @ICmpSLT_vector_Zero(<2 x ptr> %x) nounwind uwtable readnone san ; CALLS-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 ; CALLS-NEXT: call void @llvm.donothing() ; CALLS-NEXT: [[TMP3:%.*]] = ptrtoint <2 x ptr> [[X]] to <2 x i64> -; CALLS-NEXT: [[TMP4:%.*]] = xor <2 x i64> [[TMP3]], -; CALLS-NEXT: [[TMP5:%.*]] = xor <2 x i64> [[TMP1]], +; CALLS-NEXT: [[TMP4:%.*]] = xor <2 x i64> [[TMP3]], splat (i64 -9223372036854775808) +; CALLS-NEXT: [[TMP5:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) ; CALLS-NEXT: [[TMP6:%.*]] = and <2 x i64> [[TMP4]], [[TMP5]] ; CALLS-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP4]], [[TMP1]] -; CALLS-NEXT: [[TMP10:%.*]] = icmp ult <2 x i64> [[TMP6]], -; CALLS-NEXT: [[TMP17:%.*]] = icmp ult <2 x i64> [[TMP7]], +; CALLS-NEXT: [[TMP10:%.*]] = icmp ult <2 x i64> [[TMP6]], splat (i64 -9223372036854775808) +; CALLS-NEXT: [[TMP17:%.*]] = icmp ult <2 x i64> [[TMP7]], splat (i64 -9223372036854775808) ; CALLS-NEXT: [[TMP18:%.*]] = xor <2 x i1> [[TMP10]], [[TMP17]] ; CALLS-NEXT: [[TMP19:%.*]] = icmp slt <2 x ptr> [[X]], zeroinitializer ; CALLS-NEXT: store <2 x i1> [[TMP18]], ptr @__msan_retval_tls, align 8 @@ -2146,14 +2146,14 @@ define <2 x i1> @ICmpSLT_vector_AllOnes(<2 x i32> %x) nounwind uwtable readnone ; CHECK-SAME: <2 x i32> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult <2 x i32> , [[TMP5]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <2 x i32> , [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult <2 x i32> splat (i32 2147483647), [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <2 x i32> splat (i32 2147483647), [[TMP4]] ; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP8]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp slt <2 x i32> , [[X]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp slt <2 x i32> splat (i32 -1), [[X]] ; CHECK-NEXT: store <2 x i1> [[TMP16]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i1> [[TMP17]] ; @@ -2162,17 +2162,17 @@ define <2 x i1> @ICmpSLT_vector_AllOnes(<2 x i32> %x) nounwind uwtable readnone ; ORIGIN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 ; ORIGIN-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 ; ORIGIN-NEXT: call void @llvm.donothing() -; ORIGIN-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[X]], -; ORIGIN-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP1]], +; ORIGIN-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[X]], splat (i32 -2147483648) +; ORIGIN-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; ORIGIN-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP3]], [[TMP4]] ; ORIGIN-NEXT: [[TMP6:%.*]] = or <2 x i32> [[TMP3]], [[TMP1]] -; ORIGIN-NEXT: [[TMP9:%.*]] = icmp ult <2 x i32> , [[TMP6]] -; ORIGIN-NEXT: [[TMP16:%.*]] = icmp ult <2 x i32> , [[TMP5]] +; ORIGIN-NEXT: [[TMP9:%.*]] = icmp ult <2 x i32> splat (i32 2147483647), [[TMP6]] +; ORIGIN-NEXT: [[TMP16:%.*]] = icmp ult <2 x i32> splat (i32 2147483647), [[TMP5]] ; ORIGIN-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP9]], [[TMP16]] ; ORIGIN-NEXT: [[TMP18:%.*]] = bitcast <2 x i32> [[TMP1]] to i64 ; ORIGIN-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP18]], 0 ; ORIGIN-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP2]], i32 0 -; ORIGIN-NEXT: [[TMP21:%.*]] = icmp slt <2 x i32> , [[X]] +; ORIGIN-NEXT: [[TMP21:%.*]] = icmp slt <2 x i32> splat (i32 -1), [[X]] ; ORIGIN-NEXT: store <2 x i1> [[TMP17]], ptr @__msan_retval_tls, align 8 ; ORIGIN-NEXT: store i32 [[TMP20]], ptr @__msan_retval_origin_tls, align 4 ; ORIGIN-NEXT: ret <2 x i1> [[TMP21]] @@ -2182,17 +2182,17 @@ define <2 x i1> @ICmpSLT_vector_AllOnes(<2 x i32> %x) nounwind uwtable readnone ; CALLS-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 ; CALLS-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4 ; CALLS-NEXT: call void @llvm.donothing() -; CALLS-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[X]], -; CALLS-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP1]], +; CALLS-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[X]], splat (i32 -2147483648) +; CALLS-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; CALLS-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP3]], [[TMP4]] ; CALLS-NEXT: [[TMP6:%.*]] = or <2 x i32> [[TMP3]], [[TMP1]] -; CALLS-NEXT: [[TMP9:%.*]] = icmp ult <2 x i32> , [[TMP6]] -; CALLS-NEXT: [[TMP16:%.*]] = icmp ult <2 x i32> , [[TMP5]] +; CALLS-NEXT: [[TMP9:%.*]] = icmp ult <2 x i32> splat (i32 2147483647), [[TMP6]] +; CALLS-NEXT: [[TMP16:%.*]] = icmp ult <2 x i32> splat (i32 2147483647), [[TMP5]] ; CALLS-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP9]], [[TMP16]] ; CALLS-NEXT: [[TMP18:%.*]] = bitcast <2 x i32> [[TMP1]] to i64 ; CALLS-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP18]], 0 ; CALLS-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP2]], i32 0 -; CALLS-NEXT: [[TMP21:%.*]] = icmp slt <2 x i32> , [[X]] +; CALLS-NEXT: [[TMP21:%.*]] = icmp slt <2 x i32> splat (i32 -1), [[X]] ; CALLS-NEXT: store <2 x i1> [[TMP17]], ptr @__msan_retval_tls, align 8 ; CALLS-NEXT: store i32 [[TMP20]], ptr @__msan_retval_origin_tls, align 4 ; CALLS-NEXT: ret <2 x i1> [[TMP21]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/reduce.ll b/llvm/test/Instrumentation/MemorySanitizer/reduce.ll index 669ccf97f74c3e..54e87b9e10a509 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/reduce.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/reduce.ll @@ -74,7 +74,7 @@ define i32 @reduce_or() sanitize_memory { ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <3 x i32>, ptr [[TMP3]], align 16 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = xor <3 x i32> [[O]], +; CHECK-NEXT: [[TMP7:%.*]] = xor <3 x i32> [[O]], splat (i32 -1) ; CHECK-NEXT: [[TMP8:%.*]] = or <3 x i32> [[TMP7]], [[_MSLD]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.or.v3i32(<3 x i32> [[_MSLD]]) diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll index 98a9cdc6960494..05d4d2a6551f5e 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -S -passes="msan" -msan-instrumentation-with-call-threshold=0 | FileCheck %s ; ; This test illustrates a bug in MemorySanitizer that will shortly be fixed @@ -16,7 +16,7 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 { ; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[DOTPRE:%.*]] = load <4 x i16>, ptr @_Z1cv, align 8, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[DOTPRE:%.*]] = load <4 x i16>, ptr @_Z1cv, align 8, !tbaa [[TBAA1:![0-9]+]] ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr inttoptr (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576) to ptr), align 8 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576), i64 35184372088832) to ptr), align 8 ; CHECK-NEXT: br label %[[FOR_COND:.*]] @@ -24,10 +24,10 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 { ; CHECK-NEXT: [[_MSPHI_S:%.*]] = phi <4 x i16> [ [[_MSLD]], %[[ENTRY]] ], [ [[_MSLD3:%.*]], %[[FOR_COND]] ] ; CHECK-NEXT: [[_MSPHI_O:%.*]] = phi i32 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP11:%.*]], %[[FOR_COND]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x i16> [ [[DOTPRE]], %[[ENTRY]] ], [ [[TMP5:%.*]], %[[FOR_COND]] ] -; CHECK-NEXT: [[_MSPHI_S1:%.*]] = phi <4 x i16> [ , %[[ENTRY]] ], [ [[_MSLD3]], %[[FOR_COND]] ] +; CHECK-NEXT: [[_MSPHI_S1:%.*]] = phi <4 x i16> [ splat (i16 -1), %[[ENTRY]] ], [ [[_MSLD3]], %[[FOR_COND]] ] ; CHECK-NEXT: [[_MSPHI_O2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP11]], %[[FOR_COND]] ] ; CHECK-NEXT: [[E_0:%.*]] = phi <4 x i16> [ undef, %[[ENTRY]] ], [ [[TMP5]], %[[FOR_COND]] ] -; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSPHI_S1]], <4 x i16> , <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSPHI_S1]], <4 x i16> splat (i16 -1), <4 x i32> ; CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[E_0]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[_MSPHI_S]] to i64 ; CHECK-NEXT: call void @__msan_maybe_warning_8(i64 zeroext [[TMP2]], i32 zeroext [[_MSPHI_O]]) @@ -36,7 +36,7 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 { ; CHECK-NEXT: [[CALL:%.*]] = tail call noundef i32 @_Z1b11__Int16x4_tS_(<4 x i16> noundef [[TMP1]], <4 x i16> noundef [[LANE]]) ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[CALL]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[CONV]] to ptr -; CHECK-NEXT: [[TMP5]] = load <4 x i16>, ptr [[TMP4]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP5]] = load <4 x i16>, ptr [[TMP4]], align 8, !tbaa [[TBAA1]] ; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr @@ -47,8 +47,8 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 { ; CHECK-NEXT: store <4 x i16> [[_MSLD3]], ptr inttoptr (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576) to ptr), align 8 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[_MSLD3]] to i64 ; CHECK-NEXT: call void @__msan_maybe_store_origin_8(i64 zeroext [[TMP12]], ptr @_Z1cv, i32 zeroext [[TMP11]]) -; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr @_Z1cv, align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr @_Z1cv, align 8, !tbaa [[TBAA1]] +; CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] ; entry: %.pre = load <4 x i16>, ptr @_Z1cv, align 8, !tbaa !2 @@ -76,9 +76,9 @@ attributes #0 = { mustprogress noreturn nounwind sanitize_memory "no-trapping-ma !5 = distinct !{!5, !6} !6 = !{!"llvm.loop.mustprogress"} ;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} -; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0} -; CHECK: [[META2]] = !{!"Simple C++ TBAA"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} -; CHECK: [[META4]] = !{!"llvm.loop.mustprogress"} +; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C++ TBAA"} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]} +; CHECK: [[META5]] = !{!"llvm.loop.mustprogress"} ;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll index 0e2c0e3d859415..d614bb85d85849 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll @@ -65,7 +65,7 @@ define <2 x i64> @Test_x86_sse2_psad_bw(<16 x i8> %a, <16 x i8> %b) sanitize_mem ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], splat (i64 48) ; CHECK-NEXT: [[C:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A]], <16 x i8> [[B]]) ; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[C]] diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll index 2131162bf4bf3f..03c5e917f07650 100644 --- a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll @@ -134,8 +134,8 @@ define <2 x float> @return_param_add_return_float_vector(<2 x float> %a) sanitiz ; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[A:%.*]] to <2 x double> ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]] ; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 -; CHECK-NEXT: [[B:%.*]] = fadd <2 x float> [[A]], -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], +; CHECK-NEXT: [[B:%.*]] = fadd <2 x float> [[A]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[B]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP6]], double [[TMP7]], i32 1, i64 0) @@ -396,8 +396,8 @@ define void @load_add_store_vector(<2 x float>* %a) sanitize_numerical_stability ; CHECK-NEXT: br label [[TMP6]] ; CHECK: 6: ; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x double> [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ] -; CHECK-NEXT: [[C:%.*]] = fadd <2 x float> [[B]], -; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP7]], +; CHECK-NEXT: [[C:%.*]] = fadd <2 x float> [[B]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP7]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[A]], i64 2) ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[C]], i64 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP8]], i64 0 @@ -772,8 +772,8 @@ define void @vector_shuffle(<2 x float> %0) sanitize_numerical_stability { ; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP0:%.*]] to <2 x double> ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x double> [[TMP3]], <2 x double> [[TMP4]] ; CHECK-NEXT: store i64 0, ptr @__nsan_shadow_args_tag, align 8 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> , <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> , <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> splat (float 1.000000e+00), <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> splat (double 1.000000e+00), <2 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll b/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll index f26463815c16d6..418ad0da26a95f 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll @@ -200,8 +200,8 @@ define i32 @f16_i8(half %in) { define <2 x i64> @v2f32_i32(<2 x float> %in) { ; CHECK-BASE-LABEL: @v2f32_i32( ; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <2 x float> [[IN:%.*]] to <2 x i64> -; CHECK-BASE-NEXT: [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> ) -; CHECK-BASE-NEXT: [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> ) +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> splat (i64 2147483647)) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> splat (i64 -2147483648)) ; CHECK-BASE-NEXT: ret <2 x i64> [[MAX]] ; ; CHECK-MVEFP-LABEL: @v2f32_i32( @@ -223,8 +223,8 @@ define <2 x i64> @v2f32_i32(<2 x float> %in) { define <4 x i64> @v4f32_i32(<4 x float> %in) { ; CHECK-BASE-LABEL: @v4f32_i32( ; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i64> -; CHECK-BASE-NEXT: [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> ) -; CHECK-BASE-NEXT: [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> ) +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> splat (i64 2147483647)) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> splat (i64 -2147483648)) ; CHECK-BASE-NEXT: ret <4 x i64> [[MAX]] ; ; CHECK-MVEFP-LABEL: @v4f32_i32( @@ -246,8 +246,8 @@ define <4 x i64> @v4f32_i32(<4 x float> %in) { define <8 x i64> @v8f32_i32(<8 x float> %in) { ; CHECK-BASE-LABEL: @v8f32_i32( ; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <8 x float> [[IN:%.*]] to <8 x i64> -; CHECK-BASE-NEXT: [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> ) -; CHECK-BASE-NEXT: [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> ) +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> splat (i64 2147483647)) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> splat (i64 -2147483648)) ; CHECK-BASE-NEXT: ret <8 x i64> [[MAX]] ; ; CHECK-MVEFP-LABEL: @v8f32_i32( @@ -269,8 +269,8 @@ define <8 x i64> @v8f32_i32(<8 x float> %in) { define <4 x i32> @v4f16_i16(<4 x half> %in) { ; CHECK-BASE-LABEL: @v4f16_i16( ; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32> -; CHECK-BASE-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> ) -; CHECK-BASE-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> ) +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> splat (i32 32767)) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> splat (i32 -32768)) ; CHECK-BASE-NEXT: ret <4 x i32> [[MAX]] ; ; CHECK-MVEFP-LABEL: @v4f16_i16( @@ -292,8 +292,8 @@ define <4 x i32> @v4f16_i16(<4 x half> %in) { define <8 x i32> @v8f16_i16(<8 x half> %in) { ; CHECK-BASE-LABEL: @v8f16_i16( ; CHECK-BASE-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> -; CHECK-BASE-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) -; CHECK-BASE-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) +; CHECK-BASE-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> splat (i32 32767)) +; CHECK-BASE-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> splat (i32 -32768)) ; CHECK-BASE-NEXT: ret <8 x i32> [[MAX]] ; ; CHECK-MVEFP-LABEL: @v8f16_i16( diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/fptosisat.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/fptosisat.ll index 4050da245c88df..252f31ec687476 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/X86/fptosisat.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/fptosisat.ll @@ -159,8 +159,8 @@ define <4 x i64> @v4f32_i32(<4 x float> %in) { define <8 x i32> @v8f16_i16(<8 x half> %in) { ; CHECK-LABEL: @v8f16_i16( ; CHECK-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> -; CHECK-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) -; CHECK-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) +; CHECK-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> splat (i32 32767)) +; CHECK-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> splat (i32 -32768)) ; CHECK-NEXT: ret <8 x i32> [[MAX]] ; %conv = fptosi <8 x half> %in to <8 x i32> diff --git a/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll b/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll index 9eb7ef1a1d46ee..5c914deb089671 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll @@ -18,7 +18,7 @@ define i32 @anyset_two_bit_mask(i32 %x) { define <2 x i32> @anyset_two_bit_mask_uniform(<2 x i32> %x) { ; CHECK-LABEL: @anyset_two_bit_mask_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 9) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP3]] @@ -48,7 +48,7 @@ define i32 @anyset_four_bit_mask(i32 %x) { define <2 x i32> @anyset_four_bit_mask_uniform(<2 x i32> %x) { ; CHECK-LABEL: @anyset_four_bit_mask_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 297) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP3]] @@ -83,7 +83,7 @@ define i32 @anyset_three_bit_mask_all_shifted_bits(i32 %x) { define <2 x i32> @anyset_three_bit_mask_all_shifted_bits_uniform(<2 x i32> %x) { ; CHECK-LABEL: @anyset_three_bit_mask_all_shifted_bits_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 296) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP3]] @@ -114,8 +114,8 @@ define i32 @allset_two_bit_mask(i32 %x) { define <2 x i32> @allset_two_bit_mask_uniform(<2 x i32> %x) { ; CHECK-LABEL: @allset_two_bit_mask_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 129) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 129) ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll index f6369a95bccf92..4a89705e17749c 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll @@ -19,7 +19,7 @@ define signext i32 @popcount8(i8 zeroext %0) { ; CHECK-NEXT: [[TMP9:%.*]] = lshr i8 [[TMP8]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i8 [[TMP9]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], 15 -; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 ; CHECK-NEXT: ret i32 [[TMP12]] ; %2 = lshr i8 %0, 1 @@ -32,8 +32,8 @@ define signext i32 @popcount8(i8 zeroext %0) { %9 = lshr i8 %8, 4 %10 = add nuw nsw i8 %9, %8 %11 = and i8 %10, 15 - %12 = zext i8 %11 to i32 - ret i32 %12 + %12 = zext i8 %11 to i32 + ret i32 %12 } ;int popcount32(unsigned i) { @@ -98,7 +98,7 @@ define signext i32 @popcount64(i64 %0) { ; y <<= 64; ; y |= 0x3333333333333333; ; __uint128_t z = 0x0f0f0f0f0f0f0f0f; -; z <<= 64; +; z <<= 64; ; z |= 0x0f0f0f0f0f0f0f0f; ; __uint128_t a = 0x0101010101010101; ; a <<= 64; @@ -112,7 +112,7 @@ define signext i32 @popcount64(i64 %0) { define signext i32 @popcount128(i128 %0) { ; CHECK-LABEL: @popcount128( ; CHECK-NEXT: [[TMP2:%.*]] = call i128 @llvm.ctpop.i128(i128 [[TMP0:%.*]]) -; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32 ; CHECK-NEXT: ret i32 [[TMP3]] ; %2 = lshr i128 %0, 1 @@ -126,9 +126,9 @@ define signext i32 @popcount128(i128 %0) { %10 = add nuw nsw i128 %9, %8 %11 = and i128 %10, 20016609818878733144904388672456953615 %12 = mul i128 %11, 1334440654591915542993625911497130241 - %13 = lshr i128 %12, 120 - %14 = trunc i128 %13 to i32 - ret i32 %14 + %13 = lshr i128 %12, 120 + %14 = trunc i128 %13 to i32 + ret i32 %14 } ;vector unsigned char popcount8vec(vector unsigned char i) @@ -140,16 +140,16 @@ define signext i32 @popcount128(i128 %0) { ;} define <16 x i8> @popcount8vec(<16 x i8> %0) { ; CHECK-LABEL: @popcount8vec( -; CHECK-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[TMP0:%.*]], splat (i8 1) +; CHECK-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], splat (i8 85) ; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i8> [[TMP0]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i8> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i8> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i8> [[TMP6]], +; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i8> [[TMP4]], splat (i8 51) +; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i8> [[TMP4]], splat (i8 2) +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i8> [[TMP6]], splat (i8 51) ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i8> [[TMP7]], [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = lshr <16 x i8> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = lshr <16 x i8> [[TMP8]], splat (i8 4) ; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw <16 x i8> [[TMP9]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP10]], splat (i8 15) ; CHECK-NEXT: ret <16 x i8> [[TMP11]] ; %2 = lshr <16 x i8> %0, diff --git a/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll b/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll index b54963eda9d1f4..0a5c710a8b70ab 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll @@ -116,7 +116,7 @@ define void @multi_uses_sub(i32 %X, i32 %Y) { define void @multi_use_vec_add(<2 x i32> %X) { ; CHECK-LABEL: @multi_use_vec_add( ; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64> -; CHECK-NEXT: [[B1:%.*]] = add <2 x i32> [[X]], +; CHECK-NEXT: [[B1:%.*]] = add <2 x i32> [[X]], splat (i32 15) ; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]] ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]]) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]]) @@ -135,7 +135,7 @@ define void @multi_use_vec_add(<2 x i32> %X) { define void @multi_use_vec_or(<2 x i32> %X) { ; CHECK-LABEL: @multi_use_vec_or( ; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64> -; CHECK-NEXT: [[B1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[B1:%.*]] = or <2 x i32> [[X]], splat (i32 15) ; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]] ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]]) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]]) @@ -154,7 +154,7 @@ define void @multi_use_vec_or(<2 x i32> %X) { define void @multi_use_vec_xor(<2 x i32> %X) { ; CHECK-LABEL: @multi_use_vec_xor( ; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64> -; CHECK-NEXT: [[B1:%.*]] = xor <2 x i32> [[X]], +; CHECK-NEXT: [[B1:%.*]] = xor <2 x i32> [[X]], splat (i32 15) ; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]] ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]]) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]]) @@ -173,7 +173,7 @@ define void @multi_use_vec_xor(<2 x i32> %X) { define void @multi_use_vec_and(<2 x i32> %X) { ; CHECK-LABEL: @multi_use_vec_and( ; CHECK-NEXT: [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64> -; CHECK-NEXT: [[B1:%.*]] = and <2 x i32> [[X]], +; CHECK-NEXT: [[B1:%.*]] = and <2 x i32> [[X]], splat (i32 15) ; CHECK-NEXT: [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]] ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]]) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]]) diff --git a/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll index 170fde4ef7002e..65595962001a43 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll @@ -11,7 +11,7 @@ define <8 x i16> @or-load-fixed-length-vector(ptr %p1) { ; CHECK-NEXT: [[L2:%.*]] = load <8 x i8>, ptr [[P2]], align 1 ; CHECK-NEXT: [[E1:%.*]] = zext <8 x i8> [[L1]] to <8 x i16> ; CHECK-NEXT: [[E2:%.*]] = zext <8 x i8> [[L2]] to <8 x i16> -; CHECK-NEXT: [[S2:%.*]] = shl <8 x i16> [[E2]], +; CHECK-NEXT: [[S2:%.*]] = shl <8 x i16> [[E2]], splat (i16 8) ; CHECK-NEXT: [[OR:%.*]] = or <8 x i16> [[E1]], [[S2]] ; CHECK-NEXT: ret <8 x i16> [[OR]] ; diff --git a/llvm/test/Transforms/Attributor/nofpclass-powi.ll b/llvm/test/Transforms/Attributor/nofpclass-powi.ll index 176f770db0a584..1d59907a34f19a 100644 --- a/llvm/test/Transforms/Attributor/nofpclass-powi.ll +++ b/llvm/test/Transforms/Attributor/nofpclass-powi.ll @@ -104,7 +104,7 @@ define float @ret_powi_f32_masked_to_even_extrabits(float %arg0, i32 %arg1) #0 { define <2 x float> @ret_powi_v2f32_masked_to_even(<2 x float> %arg0, <2 x i32> %arg1) #0 { ; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) <2 x float> @ret_powi_v2f32_masked_to_even ; CHECK-SAME: (<2 x float> [[ARG0:%.*]], <2 x i32> [[ARG1:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[KNOWN_EVEN:%.*]] = and <2 x i32> [[ARG1]], +; CHECK-NEXT: [[KNOWN_EVEN:%.*]] = and <2 x i32> [[ARG1]], splat (i32 -2) ; CHECK-NEXT: [[CALL:%.*]] = call nofpclass(ninf nzero nsub nnorm) <2 x float> @llvm.powi.v2f32.v2i32(<2 x float> [[ARG0]], <2 x i32> [[KNOWN_EVEN]]) #[[ATTR6]] ; CHECK-NEXT: ret <2 x float> [[CALL]] ; @@ -277,7 +277,7 @@ define <4 x float> @powi_v4f32_i32_regression(<4 x float> %arg) { ; CHECK-LABEL: define nofpclass(nzero) <4 x float> @powi_v4f32_i32_regression ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR5]] { ; CHECK-NEXT: [[POWI:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[ARG]], i32 noundef 4) #[[ATTR6]] -; CHECK-NEXT: [[USER:%.*]] = fsub <4 x float> [[POWI]], +; CHECK-NEXT: [[USER:%.*]] = fsub <4 x float> [[POWI]], splat (float 1.000000e+00) ; CHECK-NEXT: ret <4 x float> [[USER]] ; %powi = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> %arg, i32 4) diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll index 2a6780b60211cf..5afe5e90d802bc 100644 --- a/llvm/test/Transforms/Attributor/nofpclass.ll +++ b/llvm/test/Transforms/Attributor/nofpclass.ll @@ -96,7 +96,7 @@ define <2 x double> @returned_zero_vector() { define <2 x double> @returned_negzero_vector() { ; CHECK-LABEL: define noundef nofpclass(nan inf pzero sub norm) <2 x double> @returned_negzero_vector() { ; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double -0.000000e+00) ; call void @unknown() ret <2 x double> @@ -2909,7 +2909,7 @@ define <2 x float> @bitcast_to_float_vect_nnan(<2 x i32> %arg) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define nofpclass(nan inf nzero nsub nnorm) <2 x float> @bitcast_to_float_vect_nnan ; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] { -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[ARG]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[ARG]], splat (i32 4) ; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i32> [[SHR]] to <2 x float> ; CHECK-NEXT: ret <2 x float> [[CAST]] ; @@ -2922,7 +2922,7 @@ define <2 x float> @bitcast_to_float_vect_sign_1(<2 x i32> %arg) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) <2 x float> @bitcast_to_float_vect_sign_1 ; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] { -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG]], splat (i32 -2147483648) ; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i32> [[OR]] to <2 x float> ; CHECK-NEXT: ret <2 x float> [[CAST]] ; @@ -2935,7 +2935,7 @@ define <2 x float> @bitcast_to_float_vect_nan(<2 x i32> %arg) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define nofpclass(inf zero sub norm) <2 x float> @bitcast_to_float_vect_nan ; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] { -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG]], splat (i32 2139095041) ; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i32> [[OR]] to <2 x float> ; CHECK-NEXT: ret <2 x float> [[CAST]] ; diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll index 70793ec5c7f833..24bf4938ff2d4a 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll @@ -60,9 +60,9 @@ define i32 @vec_write_3() { ; CHECK-LABEL: define {{[^@]+}}@vec_write_3 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[A:%.*]] = alloca <4 x i32>, align 16 -; CHECK-NEXT: store <2 x i32> , ptr [[A]], align 16 +; CHECK-NEXT: store <2 x i32> splat (i32 3), ptr [[A]], align 16 ; CHECK-NEXT: [[G:%.*]] = getelementptr i32, ptr [[A]], i64 1 -; CHECK-NEXT: store <2 x i32> , ptr [[G]], align 8 +; CHECK-NEXT: store <2 x i32> splat (i32 5), ptr [[G]], align 8 ; CHECK-NEXT: [[J:%.*]] = getelementptr i32, ptr [[G]], i64 1 ; CHECK-NEXT: [[L2B:%.*]] = load i32, ptr [[G]], align 8 ; CHECK-NEXT: [[ADD:%.*]] = add i32 3, [[L2B]] diff --git a/llvm/test/Transforms/BDCE/binops-multiuse.ll b/llvm/test/Transforms/BDCE/binops-multiuse.ll index 0c03ca4d6fc57e..950a01bcf33ef5 100644 --- a/llvm/test/Transforms/BDCE/binops-multiuse.ll +++ b/llvm/test/Transforms/BDCE/binops-multiuse.ll @@ -243,8 +243,8 @@ define void @select_vectorized(i1 %c, <2 x i8> %a, <2 x i8> %b) { ; CHECK-SAME: i1 [[C:%.*]], <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], <2 x i8> [[A]], <2 x i8> [[B]] -; CHECK-NEXT: [[RET1:%.*]] = and <2 x i8> [[S]], -; CHECK-NEXT: [[RET2:%.*]] = and <2 x i8> [[S]], +; CHECK-NEXT: [[RET1:%.*]] = and <2 x i8> [[S]], splat (i8 4) +; CHECK-NEXT: [[RET2:%.*]] = and <2 x i8> [[S]], splat (i8 12) ; CHECK-NEXT: call void @use3(<2 x i8> [[RET1]]) ; CHECK-NEXT: call void @use3(<2 x i8> [[RET2]]) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/BDCE/dead-uses.ll b/llvm/test/Transforms/BDCE/dead-uses.ll index 85ee0dd8f2b904..5c1677e59a8b88 100644 --- a/llvm/test/Transforms/BDCE/dead-uses.ll +++ b/llvm/test/Transforms/BDCE/dead-uses.ll @@ -27,11 +27,11 @@ define i32 @pr39771_fshr_multi_use_instr(i32 %a) { ; First fshr operand is dead (vector variant). define <2 x i32> @pr39771_fshr_multi_use_instr_vec(<2 x i32> %a) { ; CHECK-LABEL: @pr39771_fshr_multi_use_instr_vec( -; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> zeroinitializer, <2 x i32> [[X]], <2 x i32> ) -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 2) +; CHECK-NEXT: [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> zeroinitializer, <2 x i32> [[X]], <2 x i32> splat (i32 1)) +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 23) ; CHECK-NEXT: [[D:%.*]] = xor <2 x i32> [[C]], [[B]] -; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[D]], +; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[D]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[E]] ; %x = or <2 x i32> %a, diff --git a/llvm/test/Transforms/BDCE/vectors-inseltpoison.ll b/llvm/test/Transforms/BDCE/vectors-inseltpoison.ll index ccf979d4fa12a2..8527e481600937 100644 --- a/llvm/test/Transforms/BDCE/vectors-inseltpoison.ll +++ b/llvm/test/Transforms/BDCE/vectors-inseltpoison.ll @@ -5,11 +5,11 @@ define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test_basic( -; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, -; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], +; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4) +; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], splat (i32 1) +; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], splat (i32 8) ; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[A3]], [[B3]] -; CHECK-NEXT: [[D:%.*]] = ashr <2 x i32> [[C]], +; CHECK-NEXT: [[D:%.*]] = ashr <2 x i32> [[C]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[D]] ; %a2 = add <2 x i32> %a, @@ -24,9 +24,9 @@ define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) { ; Going vector -> scalar define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test_extractelement( -; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, -; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], +; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4) +; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], splat (i32 1) +; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], splat (i32 8) ; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[A3]], [[B3]] ; CHECK-NEXT: [[D:%.*]] = extractelement <2 x i32> [[C]], i32 0 ; CHECK-NEXT: [[E:%.*]] = ashr i32 [[D]], 3 @@ -45,12 +45,12 @@ define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) { ; Going scalar -> vector define <2 x i32> @test_insertelement(i32 %a, i32 %b) { ; CHECK-LABEL: @test_insertelement( -; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> zeroinitializer, +; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4) ; CHECK-NEXT: [[Y:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0 ; CHECK-NEXT: [[Y2:%.*]] = insertelement <2 x i32> [[Y]], i32 [[A:%.*]], i32 1 -; CHECK-NEXT: [[Y3:%.*]] = and <2 x i32> [[Y2]], +; CHECK-NEXT: [[Y3:%.*]] = and <2 x i32> [[Y2]], splat (i32 8) ; CHECK-NEXT: [[Z:%.*]] = or <2 x i32> [[X3]], [[Y3]] -; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], +; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[U]] ; %x = insertelement <2 x i32> poison, i32 %a, i32 0 @@ -67,12 +67,12 @@ define <2 x i32> @test_insertelement(i32 %a, i32 %b) { ; Some non-int vectors and conversions define <2 x i32> @test_conversion(<2 x i32> %a) { ; CHECK-LABEL: @test_conversion( -; CHECK-NEXT: [[A2:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> [[A2]], +; CHECK-NEXT: [[A2:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 1) +; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> [[A2]], splat (i32 2) ; CHECK-NEXT: [[X:%.*]] = uitofp <2 x i32> [[A3]] to <2 x double> -; CHECK-NEXT: [[Y:%.*]] = fadd <2 x double> [[X]], +; CHECK-NEXT: [[Y:%.*]] = fadd <2 x double> [[X]], splat (double 1.000000e+00) ; CHECK-NEXT: [[Z:%.*]] = fptoui <2 x double> [[Y]] to <2 x i32> -; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], +; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[U]] ; %a2 = add <2 x i32> %a, @@ -88,7 +88,7 @@ define <2 x i32> @test_conversion(<2 x i32> %a) { define <2 x i1> @test_assumption_invalidation(<2 x i1> %b, <2 x i8> %x) { ; CHECK-LABEL: @test_assumption_invalidation( ; CHECK-NEXT: [[LITTLE_NUMBER:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8> -; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, +; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, splat (i8 1) ; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i8> [[BIG_NUMBER]], [[LITTLE_NUMBER]] ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i8> [[SUB]] to <2 x i1> ; CHECK-NEXT: ret <2 x i1> [[TRUNC]] diff --git a/llvm/test/Transforms/BDCE/vectors.ll b/llvm/test/Transforms/BDCE/vectors.ll index ba5ac9da63b0b4..3c50f9ca5976ed 100644 --- a/llvm/test/Transforms/BDCE/vectors.ll +++ b/llvm/test/Transforms/BDCE/vectors.ll @@ -5,11 +5,11 @@ define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test_basic( -; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, -; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], +; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4) +; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], splat (i32 1) +; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], splat (i32 8) ; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[A3]], [[B3]] -; CHECK-NEXT: [[D:%.*]] = ashr <2 x i32> [[C]], +; CHECK-NEXT: [[D:%.*]] = ashr <2 x i32> [[C]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[D]] ; %a2 = add <2 x i32> %a, @@ -24,9 +24,9 @@ define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) { ; Going vector -> scalar define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test_extractelement( -; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, -; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], -; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], +; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4) +; CHECK-NEXT: [[B2:%.*]] = add <2 x i32> [[B:%.*]], splat (i32 1) +; CHECK-NEXT: [[B3:%.*]] = and <2 x i32> [[B2]], splat (i32 8) ; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[A3]], [[B3]] ; CHECK-NEXT: [[D:%.*]] = extractelement <2 x i32> [[C]], i32 0 ; CHECK-NEXT: [[E:%.*]] = ashr i32 [[D]], 3 @@ -45,12 +45,12 @@ define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) { ; Going scalar -> vector define <2 x i32> @test_insertelement(i32 %a, i32 %b) { ; CHECK-LABEL: @test_insertelement( -; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> zeroinitializer, +; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4) ; CHECK-NEXT: [[Y:%.*]] = insertelement <2 x i32> undef, i32 [[B:%.*]], i32 0 ; CHECK-NEXT: [[Y2:%.*]] = insertelement <2 x i32> [[Y]], i32 [[A:%.*]], i32 1 -; CHECK-NEXT: [[Y3:%.*]] = and <2 x i32> [[Y2]], +; CHECK-NEXT: [[Y3:%.*]] = and <2 x i32> [[Y2]], splat (i32 8) ; CHECK-NEXT: [[Z:%.*]] = or <2 x i32> [[X3]], [[Y3]] -; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], +; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[U]] ; %x = insertelement <2 x i32> undef, i32 %a, i32 0 @@ -67,12 +67,12 @@ define <2 x i32> @test_insertelement(i32 %a, i32 %b) { ; Some non-int vectors and conversions define <2 x i32> @test_conversion(<2 x i32> %a) { ; CHECK-LABEL: @test_conversion( -; CHECK-NEXT: [[A2:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> [[A2]], +; CHECK-NEXT: [[A2:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 1) +; CHECK-NEXT: [[A3:%.*]] = and <2 x i32> [[A2]], splat (i32 2) ; CHECK-NEXT: [[X:%.*]] = uitofp <2 x i32> [[A3]] to <2 x double> -; CHECK-NEXT: [[Y:%.*]] = fadd <2 x double> [[X]], +; CHECK-NEXT: [[Y:%.*]] = fadd <2 x double> [[X]], splat (double 1.000000e+00) ; CHECK-NEXT: [[Z:%.*]] = fptoui <2 x double> [[Y]] to <2 x i32> -; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], +; CHECK-NEXT: [[U:%.*]] = ashr <2 x i32> [[Z]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[U]] ; %a2 = add <2 x i32> %a, @@ -88,7 +88,7 @@ define <2 x i32> @test_conversion(<2 x i32> %a) { define <2 x i1> @test_assumption_invalidation(<2 x i1> %b, <2 x i8> %x) { ; CHECK-LABEL: @test_assumption_invalidation( ; CHECK-NEXT: [[LITTLE_NUMBER:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8> -; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, +; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, splat (i8 1) ; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i8> [[BIG_NUMBER]], [[LITTLE_NUMBER]] ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i8> [[SUB]] to <2 x i1> ; CHECK-NEXT: ret <2 x i1> [[TRUNC]] diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll index 33d18d0e2a795b..7abc32e4f1cd85 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll @@ -327,10 +327,10 @@ define void @simple_urem_to_sel_vec(<2 x i64> %rem_amt) nounwind { ; CHECK-NEXT: [[REM:%.*]] = phi <2 x i64> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[I_04:%.*]] = phi <2 x i64> [ [[INC:%.*]], %[[FOR_BODY]] ], [ zeroinitializer, %[[ENTRY]] ] ; CHECK-NEXT: tail call void @use.2xi64(<2 x i64> [[REM]]) -; CHECK-NEXT: [[TMP1:%.*]] = add nuw <2 x i64> [[REM]], +; CHECK-NEXT: [[TMP1:%.*]] = add nuw <2 x i64> [[REM]], splat (i64 1) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], [[REM_AMT]] ; CHECK-NEXT: [[TMP3]] = select <2 x i1> [[TMP2]], <2 x i64> zeroinitializer, <2 x i64> [[TMP1]] -; CHECK-NEXT: [[INC]] = add nuw <2 x i64> [[I_04]], +; CHECK-NEXT: [[INC]] = add nuw <2 x i64> [[I_04]], splat (i64 1) ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = call i1 @get.i1() ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll index 124ce321e1819d..6ef3400812fc8e 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll @@ -13,7 +13,7 @@ target triple = "x86_64-unknown-linux-gnu" define <4 x i32> @splat_base(ptr %base, <4 x i64> %index) { ; CHECK-LABEL: @splat_base( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i64> [[INDEX:%.*]] -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x ptr> poison, ptr %base, i32 0 @@ -27,7 +27,7 @@ define <4 x i32> @splat_struct(ptr %base) { ; CHECK-LABEL: @splat_struct( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], ptr [[BASE:%.*]], i64 0, i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %gep = getelementptr %struct.a, ptr %base, <4 x i64> zeroinitializer, i32 1 @@ -39,7 +39,7 @@ define <4 x i32> @scalar_index(ptr %base, i64 %index) { ; CHECK-LABEL: @scalar_index( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x ptr> poison, ptr %base, i32 0 @@ -53,7 +53,7 @@ define <4 x i32> @splat_index(ptr %base, i64 %index) { ; CHECK-LABEL: @splat_index( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %index, i32 0 @@ -66,7 +66,7 @@ define <4 x i32> @splat_index(ptr %base, i64 %index) { define <4 x i32> @test_global_array(<4 x i64> %indxs) { ; CHECK-LABEL: @test_global_array( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr @glob_array, <4 x i64> [[INDXS:%.*]] -; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[G]] ; %p = getelementptr inbounds [16 x i32], ptr @glob_array, i64 0, <4 x i64> %indxs @@ -76,7 +76,7 @@ define <4 x i32> @test_global_array(<4 x i64> %indxs) { define <4 x i32> @global_struct_splat() { ; CHECK-LABEL: @global_struct_splat( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> , i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> , i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x ptr> poison, ptr @c, i32 0 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll index 6c9c844a4ebdb6..83287083930299 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll @@ -12,7 +12,7 @@ target triple = "x86_64-unknown-linux-gnu" define <4 x i32> @splat_base(ptr %base, <4 x i64> %index) { ; CHECK-LABEL: @splat_base( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i64> [[INDEX:%.*]] -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x ptr> undef, ptr %base, i32 0 @@ -26,7 +26,7 @@ define <4 x i32> @splat_struct(ptr %base) { ; CHECK-LABEL: @splat_struct( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], ptr [[BASE:%.*]], i64 0, i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %gep = getelementptr %struct.a, ptr %base, <4 x i64> zeroinitializer, i32 1 @@ -38,7 +38,7 @@ define <4 x i32> @scalar_index(ptr %base, i64 %index) { ; CHECK-LABEL: @scalar_index( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x ptr> undef, ptr %base, i32 0 @@ -52,7 +52,7 @@ define <4 x i32> @splat_index(ptr %base, i64 %index) { ; CHECK-LABEL: @splat_index( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0 @@ -65,7 +65,7 @@ define <4 x i32> @splat_index(ptr %base, i64 %index) { define <4 x i32> @test_global_array(<4 x i64> %indxs) { ; CHECK-LABEL: @test_global_array( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr @glob_array, <4 x i64> [[INDXS:%.*]] -; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[G]] ; %p = getelementptr inbounds [16 x i32], ptr @glob_array, i64 0, <4 x i64> %indxs @@ -75,7 +75,7 @@ define <4 x i32> @test_global_array(<4 x i64> %indxs) { define <4 x i32> @global_struct_splat() { ; CHECK-LABEL: @global_struct_splat( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> , i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> , i32 4, <4 x i1> splat (i1 true), <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x ptr> undef, ptr @c, i32 0 diff --git a/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll b/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll index 6d8890d71e2be9..25cd5526f89a36 100644 --- a/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll +++ b/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll @@ -123,17 +123,11 @@ define i64 @sdiv_minsize(i64 %a) minsize { } define <2 x i64> @sdiv_v2i64(<2 x i64> %a) { -; CV-LABEL: define <2 x i64> @sdiv_v2i64( -; CV-SAME: <2 x i64> [[A:%.*]]) { -; CV-NEXT: [[TMP1:%.*]] = sdiv <2 x i64> [[A]], -; CV-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP1]], -; CV-NEXT: ret <2 x i64> [[TMP2]] -; -; CI-LABEL: define <2 x i64> @sdiv_v2i64( -; CI-SAME: <2 x i64> [[A:%.*]]) { -; CI-NEXT: [[TMP1:%.*]] = sdiv <2 x i64> [[A]], splat (i64 4294967087) -; CI-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP1]], splat (i64 4294967087) -; CI-NEXT: ret <2 x i64> [[TMP2]] +; CHECK-LABEL: define <2 x i64> @sdiv_v2i64( +; CHECK-SAME: <2 x i64> [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i64> [[A]], splat (i64 4294967087) +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[TMP1]], splat (i64 4294967087) +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %1 = sdiv <2 x i64> %a, %2 = add <2 x i64> %1, diff --git a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll index 88df92fbada075..b21e94b2feb3f4 100644 --- a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll +++ b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll @@ -16,7 +16,7 @@ define <2 x i1> @test.vectorgep(<2 x ptr> %vec) { define <2 x i1> @test.vectorgep.ult.true(<2 x ptr> %vec) { ; CHECK-LABEL: @test.vectorgep.ult.true( ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, <2 x ptr> [[VEC:%.*]], i64 1 -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %gep.1 = getelementptr inbounds i32, <2 x ptr> %vec, i64 1 %t.1 = icmp ult <2 x ptr> %vec, %gep.1 diff --git a/llvm/test/Transforms/ConstraintElimination/vector-compares.ll b/llvm/test/Transforms/ConstraintElimination/vector-compares.ll index 3fa0e18ec5cf2c..e87439fbaf3b08 100644 --- a/llvm/test/Transforms/ConstraintElimination/vector-compares.ll +++ b/llvm/test/Transforms/ConstraintElimination/vector-compares.ll @@ -19,7 +19,7 @@ define void @test_vector_iv(i32 %x, i1 %c) { ; CHECK-NEXT: call void @use(i1 [[C_2]]) ; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i32 [[X]], 9 ; CHECK-NEXT: call void @use(i1 [[C_3]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <4 x i8> [[IV]], +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <4 x i8> [[IV]], splat (i8 1) ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x i8> [[IV_NEXT]], i8 2 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[E]], 100 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]] diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll index 200793918f0ef2..57c9f8926b524f 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll @@ -1248,9 +1248,9 @@ define i1 @non_const_range_minmax(i8 %a, i8 %b) { define <2 x i1> @non_const_range_minmax_vec(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @non_const_range_minmax_vec( -; CHECK-NEXT: [[A2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) -; CHECK-NEXT: [[B2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[B:%.*]], <2 x i8> ) -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: [[A2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 10)) +; CHECK-NEXT: [[B2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[B:%.*]], <2 x i8> splat (i8 11)) +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %a2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %a, <2 x i8> ) %b2 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %b, <2 x i8> ) diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll index 669527ed88fa2f..5122f98f4c82fd 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll @@ -715,7 +715,7 @@ define { i8, i1 } @signed_mul_constant_folding() { define { <2 x i32>, <2 x i1> } @uaddo_vec(<2 x i32> %a) { ; CHECK-LABEL: @uaddo_vec( -; CHECK-NEXT: [[ADD:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[ADD:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[ADD]] ; %add = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> ) @@ -909,7 +909,7 @@ cont: define <2 x i8> @uadd_sat_vec(<2 x i8> %a) { ; CHECK-LABEL: @uadd_sat_vec( -; CHECK-NEXT: [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 1)) ; CHECK-NEXT: ret <2 x i8> [[ADD]] ; %add = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll index bda270e99bc2e6..37eb4d9c978ec8 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll @@ -4,8 +4,8 @@ define <2 x i1> @cmp1(<2 x i8> %a) { ; CHECK-LABEL: define <2 x i1> @cmp1( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i8> [[A]], -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i8> [[A]], splat (i8 1) +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %add = add nuw <2 x i8> %a, splat (i8 1) %cmp = icmp ne <2 x i8> %add, zeroinitializer @@ -15,8 +15,8 @@ define <2 x i1> @cmp1(<2 x i8> %a) { define <2 x i1> @cmp2(<2 x i8> %a) { ; CHECK-LABEL: define <2 x i1> @cmp2( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i8> [[A]], -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i8> [[A]], splat (i8 5) +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %add = add nuw <2 x i8> %a, splat (i8 5) %cmp = icmp ugt <2 x i8> %add, splat (i8 2) @@ -27,7 +27,7 @@ define <2 x i1> @cmp_nonsplat(<2 x i8> %a) { ; CHECK-LABEL: define <2 x i1> @cmp_nonsplat( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i8> [[A]], -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %add = add nuw <2 x i8> %a, %cmp = icmp ugt <2 x i8> %add, @@ -51,7 +51,7 @@ define <2 x i1> @cmp_signedness(<2 x i8> %a) { ; CHECK-LABEL: define <2 x i1> @cmp_signedness( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i16> [[ZEXT]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i16> [[ZEXT]], splat (i16 5) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %zext = zext <2 x i8> %a to <2 x i16> @@ -63,7 +63,7 @@ define <2 x i16> @infer_nowrap(<2 x i8> %a) { ; CHECK-LABEL: define range(i16 1, 257) <2 x i16> @infer_nowrap( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> -; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], +; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], splat (i16 1) ; CHECK-NEXT: ret <2 x i16> [[RES]] ; %zext = zext <2 x i8> %a to <2 x i16> @@ -134,7 +134,7 @@ define <2 x i16> @saturating(<2 x i8> %a) { ; CHECK-LABEL: define range(i16 1, 257) <2 x i16> @saturating( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> -; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], +; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], splat (i16 1) ; CHECK-NEXT: ret <2 x i16> [[RES]] ; %zext = zext <2 x i8> %a to <2 x i16> @@ -146,7 +146,7 @@ define {<2 x i16>, <2 x i1>} @with_overflow(<2 x i8> %a) { ; CHECK-LABEL: define { <2 x i16>, <2 x i1> } @with_overflow( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> -; CHECK-NEXT: [[RES1:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], +; CHECK-NEXT: [[RES1:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], splat (i16 1) ; CHECK-NEXT: [[RES:%.*]] = insertvalue { <2 x i16>, <2 x i1> } { <2 x i16> poison, <2 x i1> zeroinitializer }, <2 x i16> [[RES1]], 0 ; CHECK-NEXT: ret { <2 x i16>, <2 x i1> } [[RES]] ; @@ -160,7 +160,7 @@ define <2 x i16> @srem1(<2 x i8> %a) { ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> ; CHECK-NEXT: [[RES1_LHS_TRUNC:%.*]] = trunc <2 x i16> [[ZEXT]] to <2 x i8> -; CHECK-NEXT: [[RES12:%.*]] = urem <2 x i8> [[RES1_LHS_TRUNC]], +; CHECK-NEXT: [[RES12:%.*]] = urem <2 x i8> [[RES1_LHS_TRUNC]], splat (i8 42) ; CHECK-NEXT: [[RES:%.*]] = zext <2 x i8> [[RES12]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[RES]] ; @@ -174,7 +174,7 @@ define <2 x i16> @srem2(<2 x i8> %a) { ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = sext <2 x i8> [[A]] to <2 x i16> ; CHECK-NEXT: [[RES_LHS_TRUNC:%.*]] = trunc <2 x i16> [[ZEXT]] to <2 x i8> -; CHECK-NEXT: [[RES1:%.*]] = srem <2 x i8> [[RES_LHS_TRUNC]], +; CHECK-NEXT: [[RES1:%.*]] = srem <2 x i8> [[RES_LHS_TRUNC]], splat (i8 42) ; CHECK-NEXT: [[RES:%.*]] = sext <2 x i8> [[RES1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[RES]] ; @@ -187,7 +187,7 @@ define <2 x i16> @ashr(<2 x i8> %a) { ; CHECK-LABEL: define range(i16 0, 128) <2 x i16> @ashr( ; CHECK-SAME: <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> -; CHECK-NEXT: [[RES:%.*]] = lshr <2 x i16> [[ZEXT]], +; CHECK-NEXT: [[RES:%.*]] = lshr <2 x i16> [[ZEXT]], splat (i16 1) ; CHECK-NEXT: ret <2 x i16> [[RES]] ; %zext = zext <2 x i8> %a to <2 x i16> @@ -247,10 +247,10 @@ define <4 x i64> @issue_97674_getConstantOnEdge(i1 %cond) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 [[COND]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] ; CHECK: [[IF_THEN]]: -; CHECK-NEXT: [[FOLDS:%.*]] = add nuw nsw <4 x i64> zeroinitializer, +; CHECK-NEXT: [[FOLDS:%.*]] = add nuw nsw <4 x i64> zeroinitializer, splat (i64 1) ; CHECK-NEXT: br label %[[IF_END]] ; CHECK: [[IF_END]]: -; CHECK-NEXT: [[R:%.*]] = phi <4 x i64> [ , %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ] +; CHECK-NEXT: [[R:%.*]] = phi <4 x i64> [ splat (i64 1), %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ] ; CHECK-NEXT: ret <4 x i64> [[R]] ; entry: @@ -333,7 +333,7 @@ define <2 x i1> @insertelement_fold1() { ; CHECK-LABEL: define <2 x i1> @insertelement_fold1() { ; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> poison, i32 10, i64 0 ; CHECK-NEXT: [[IE2:%.*]] = insertelement <2 x i32> [[IE1]], i32 20, i64 1 -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %ie1 = insertelement <2 x i32> poison, i32 10, i64 0 %ie2 = insertelement <2 x i32> %ie1, i32 20, i64 1 @@ -346,7 +346,7 @@ define <2 x i1> @insertelement_fold1() { define <2 x i1> @insertelement_fold2() { ; CHECK-LABEL: define <2 x i1> @insertelement_fold2() { ; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> , i32 10, i64 0 -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %ie1 = insertelement <2 x i32> , i32 10, i64 0 %icmp1 = icmp slt <2 x i32> %ie1, diff --git a/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll b/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll index 8b257f86caf6c0..9f6c21450d040e 100644 --- a/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll +++ b/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll @@ -6,9 +6,9 @@ define <4 x i32> @bar(<4 x i32> %arg, ptr %arg1) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: bb: -; CHECK-NEXT: store i32 5, ptr [[ARG1:%.*]] -; CHECK-NEXT: [[TMP:%.*]] = tail call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, ptr null, <4 x i32> [[ARG:%.*]], <4 x i32> , i8 1) -; CHECK-NEXT: store i32 10, ptr [[ARG1]] +; CHECK-NEXT: store i32 5, ptr [[ARG1:%.*]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = tail call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, ptr null, <4 x i32> [[ARG:%.*]], <4 x i32> splat (i32 -1), i8 1) +; CHECK-NEXT: store i32 10, ptr [[ARG1]], align 4 ; CHECK-NEXT: ret <4 x i32> [[TMP]] ; bb: diff --git a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll index f0f52d56a9ff07..7169d9f90c6668 100644 --- a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll @@ -53,8 +53,8 @@ b0: define dllexport i32 @f1(ptr %a, <4 x i8> %v1, <4 x i32> %v2) { ; CHECK-LABEL: @f1( -; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V2:%.*]], ptr [[A:%.*]], i32 1, <4 x i1> ) -; CHECK-NEXT: call void @llvm.masked.store.v4i8.p0(<4 x i8> [[V1:%.*]], ptr [[A]], i32 1, <4 x i1> ) +; CHECK-NEXT: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V2:%.*]], ptr [[A:%.*]], i32 1, <4 x i1> splat (i1 true)) +; CHECK-NEXT: tail call void @llvm.masked.store.v4i8.p0(<4 x i8> [[V1:%.*]], ptr [[A]], i32 1, <4 x i1> splat (i1 true)) ; CHECK-NEXT: ret i32 0 ; tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %v2, ptr %a, i32 1, <4 x i1> ) @@ -64,8 +64,8 @@ define dllexport i32 @f1(ptr %a, <4 x i8> %v1, <4 x i32> %v2) { define dllexport i32 @f2(ptr %a, <4 x i8> %v1, <4 x i32> %v2, <4 x i1> %mask) { ; CHECK-LABEL: @f2( -; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V2:%.*]], ptr [[A:%.*]], i32 1, <4 x i1> [[MASK:%.*]]) -; CHECK-NEXT: call void @llvm.masked.store.v4i8.p0(<4 x i8> [[V1:%.*]], ptr [[A]], i32 1, <4 x i1> [[MASK]]) +; CHECK-NEXT: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V2:%.*]], ptr [[A:%.*]], i32 1, <4 x i1> [[MASK:%.*]]) +; CHECK-NEXT: tail call void @llvm.masked.store.v4i8.p0(<4 x i8> [[V1:%.*]], ptr [[A]], i32 1, <4 x i1> [[MASK]]) ; CHECK-NEXT: ret i32 0 ; tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %v2, ptr %a, i32 1, <4 x i1> %mask) diff --git a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll index fbab350008f4ee..bdc023128c8520 100644 --- a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll @@ -79,7 +79,7 @@ define void @VectorTestPartiallyOverlapping(ptr %arg, i32 %i) { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I2:%.*]] = zext i32 [[I:%.*]] to i64 ; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds float, ptr [[ARG:%.*]], i64 [[I2]] -; CHECK-NEXT: store <2 x float> , ptr [[I3]], align 16 +; CHECK-NEXT: store <2 x float> splat (float 1.000000e+00), ptr [[I3]], align 16 ; CHECK-NEXT: [[I5:%.*]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[I6:%.*]] = zext i32 [[I5]] to i64 ; CHECK-NEXT: [[I7:%.*]] = getelementptr inbounds float, ptr [[ARG]], i64 [[I6]] diff --git a/llvm/test/Transforms/DivRemPairs/AMDGPU/div-rem-pairs.ll b/llvm/test/Transforms/DivRemPairs/AMDGPU/div-rem-pairs.ll index d01ded9ebbfda8..ef2df8a3d678a9 100644 --- a/llvm/test/Transforms/DivRemPairs/AMDGPU/div-rem-pairs.ll +++ b/llvm/test/Transforms/DivRemPairs/AMDGPU/div-rem-pairs.ll @@ -74,8 +74,8 @@ define i32 @poison_does_not_freeze_signed(ptr %p, i32 noundef %x, i32 noundef %y define <4 x i8> @poison_does_not_freeze_vector(ptr %p, <4 x i8> noundef %x, <4 x i8> noundef %y) { ; CHECK-LABEL: define <4 x i8> @poison_does_not_freeze_vector( ; CHECK-SAME: ptr [[P:%.*]], <4 x i8> noundef [[X:%.*]], <4 x i8> noundef [[Y:%.*]]) { -; CHECK-NEXT: [[X2:%.*]] = shl nuw nsw <4 x i8> [[X]], -; CHECK-NEXT: [[Y2:%.*]] = add nuw nsw <4 x i8> [[Y]], +; CHECK-NEXT: [[X2:%.*]] = shl nuw nsw <4 x i8> [[X]], splat (i8 5) +; CHECK-NEXT: [[Y2:%.*]] = add nuw nsw <4 x i8> [[Y]], splat (i8 1) ; CHECK-NEXT: [[DIV:%.*]] = udiv <4 x i8> [[X2]], [[Y2]] ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[DIV]], [[Y2]] ; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub <4 x i8> [[X2]], [[TMP1]] diff --git a/llvm/test/Transforms/EarlyCSE/commute.ll b/llvm/test/Transforms/EarlyCSE/commute.ll index 1cf7ddda7f0dd7..edafeccd3c8cc4 100644 --- a/llvm/test/Transforms/EarlyCSE/commute.ll +++ b/llvm/test/Transforms/EarlyCSE/commute.ll @@ -483,9 +483,9 @@ define i32 @select_not_cond(i1 %cond, i32 %t, i32 %f) { define <2 x double> @select_not_cond_commute_vec(<2 x i1> %cond, <2 x double> %t, <2 x double> %f) { ; CHECK-LABEL: @select_not_cond_commute_vec( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[COND:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[COND:%.*]], splat (i1 true) ; CHECK-NEXT: [[M1:%.*]] = select <2 x i1> [[COND]], <2 x double> [[T:%.*]], <2 x double> [[F:%.*]] -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %not = xor <2 x i1> %cond, %m1 = select <2 x i1> %cond, <2 x double> %t, <2 x double> %f @@ -833,7 +833,7 @@ define float @maxnum(float %a, float %b) { define <2 x float> @minnum(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: @minnum( ; CHECK-NEXT: [[X:%.*]] = call fast <2 x float> @llvm.minnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %x = call fast <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) %y = call fast <2 x float> @llvm.minnum.v2f32(<2 x float> %b, <2 x float> %a) @@ -844,7 +844,7 @@ define <2 x float> @minnum(<2 x float> %a, <2 x float> %b) { define <2 x double> @maximum(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @maximum( ; CHECK-NEXT: [[X:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %x = call fast <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b) %y = call <2 x double> @llvm.maximum.v2f64(<2 x double> %b, <2 x double> %a) @@ -1109,7 +1109,7 @@ define float @fma_different_add_ops(float %a, float %b, float %c, float %d) { define <2 x double> @fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fmuladd( ; CHECK-NEXT: [[X:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]]) -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %x = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) %y = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> %a, <2 x double> %c) diff --git a/llvm/test/Transforms/EarlyCSE/gep.ll b/llvm/test/Transforms/EarlyCSE/gep.ll index 499b5ac8de0af9..282cb94d8c5f20 100644 --- a/llvm/test/Transforms/EarlyCSE/gep.ll +++ b/llvm/test/Transforms/EarlyCSE/gep.ll @@ -13,7 +13,7 @@ define void @foo(ptr %a, <4 x i64> %b, i64 %i) { ; CHECK-NEXT: [[N1D:%.*]] = getelementptr i8, ptr [[A]], i64 7 ; CHECK-NEXT: [[N1G:%.*]] = getelementptr i32, ptr [[A]], i64 1 ; CHECK-NEXT: [[N1H:%.*]] = getelementptr i8, ptr [[A]], i64 [[I]] -; CHECK-NEXT: [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> +; CHECK-NEXT: [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> splat (i64 1) ; CHECK-NEXT: call void @use_vec(<4 x ptr> [[V]]) ; CHECK-NEXT: [[V2:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> ; CHECK-NEXT: call void @use_vec(<4 x ptr> [[V2]]) diff --git a/llvm/test/Transforms/GVN/non-integral-pointers-inseltpoison.ll b/llvm/test/Transforms/GVN/non-integral-pointers-inseltpoison.ll index c0afce55cb3e87..18e9dc41a6b6dd 100644 --- a/llvm/test/Transforms/GVN/non-integral-pointers-inseltpoison.ll +++ b/llvm/test/Transforms/GVN/non-integral-pointers-inseltpoison.ll @@ -287,7 +287,7 @@ declare void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) nocapture, ptr nocapture, i define ptr addrspace(4) @neg_store_clobber(ptr addrspace(4) %loc) { ; CHECK-LABEL: @neg_store_clobber( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <2 x i64> , ptr addrspace(4) [[LOC:%.*]], align 16 +; CHECK-NEXT: store <2 x i64> splat (i64 4), ptr addrspace(4) [[LOC:%.*]], align 16 ; CHECK-NEXT: [[LOC_OFF:%.*]] = getelementptr ptr addrspace(4), ptr addrspace(4) [[LOC]], i64 1 ; CHECK-NEXT: [[REF:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[LOC_OFF]], align 8 ; CHECK-NEXT: ret ptr addrspace(4) [[REF]] @@ -369,7 +369,7 @@ entry: declare void @use.v2(<2 x ptr addrspace(4)>) declare void @use.v4(<4 x ptr addrspace(4)>) - define ptr addrspace(5) @multini(i1 %alwaysFalse, ptr addrspace(4) %val, ptr %loc) { + define ptr addrspace(5) @multini(i1 %alwaysFalse, ptr addrspace(4) %val, ptr %loc) { ; CHECK-LABEL: @multini( ; CHECK-NEXT: entry: ; CHECK-NEXT: store ptr addrspace(4) [[VAL:%.*]], ptr [[LOC:%.*]], align 8 diff --git a/llvm/test/Transforms/GVN/non-integral-pointers.ll b/llvm/test/Transforms/GVN/non-integral-pointers.ll index 5f2aef4f443502..559d63651eb323 100644 --- a/llvm/test/Transforms/GVN/non-integral-pointers.ll +++ b/llvm/test/Transforms/GVN/non-integral-pointers.ll @@ -287,7 +287,7 @@ declare void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) nocapture, ptr nocapture, i define ptr addrspace(4) @neg_store_clobber(ptr addrspace(4) %loc) { ; CHECK-LABEL: @neg_store_clobber( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <2 x i64> , ptr addrspace(4) [[LOC:%.*]], align 16 +; CHECK-NEXT: store <2 x i64> splat (i64 4), ptr addrspace(4) [[LOC:%.*]], align 16 ; CHECK-NEXT: [[LOC_OFF:%.*]] = getelementptr ptr addrspace(4), ptr addrspace(4) [[LOC]], i64 1 ; CHECK-NEXT: [[REF:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[LOC_OFF]], align 8 ; CHECK-NEXT: ret ptr addrspace(4) [[REF]] diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll index 2f6640ce985426..8a9c81912cc935 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll @@ -48,7 +48,7 @@ define void @constexpr_gep_gep_addrspacecast(i64 %idx0, i64 %idx1) { define amdgpu_kernel void @vector_gep(<4 x ptr addrspace(3)> %array) nounwind { ; CHECK-LABEL: @vector_gep( ; CHECK-NEXT: [[CAST:%.*]] = addrspacecast <4 x ptr addrspace(3)> [[ARRAY:%.*]] to <4 x ptr> -; CHECK-NEXT: [[P:%.*]] = getelementptr [1024 x i32], <4 x ptr> [[CAST]], <4 x i16> zeroinitializer, <4 x i16> +; CHECK-NEXT: [[P:%.*]] = getelementptr [1024 x i32], <4 x ptr> [[CAST]], <4 x i16> zeroinitializer, <4 x i16> splat (i16 16) ; CHECK-NEXT: [[P0:%.*]] = extractelement <4 x ptr> [[P]], i32 0 ; CHECK-NEXT: [[P1:%.*]] = extractelement <4 x ptr> [[P]], i32 1 ; CHECK-NEXT: [[P2:%.*]] = extractelement <4 x ptr> [[P]], i32 2 diff --git a/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll b/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll index 9e051ae63bfc61..e6b27cc1c452b0 100644 --- a/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll +++ b/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll @@ -6,7 +6,7 @@ define <4 x i32> @masked_gather_inferas(ptr addrspace(1) %out, <4 x i64> %index) ; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], <4 x i64> [[INDEX:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT]], <4 x i64> [[INDEX]] -; CHECK-NEXT: [[VALUE:%.*]] = tail call <4 x i32> @llvm.masked.gather.v4i32.v4p1(<4 x ptr addrspace(1)> [[PTRS]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[VALUE:%.*]] = tail call <4 x i32> @llvm.masked.gather.v4i32.v4p1(<4 x ptr addrspace(1)> [[PTRS]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK-NEXT: ret <4 x i32> [[VALUE]] ; entry: @@ -21,7 +21,7 @@ define void @masked_scatter_inferas(ptr addrspace(1) %out, <4 x i64> %index, <4 ; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], <4 x i64> [[INDEX:%.*]], <4 x i32> [[VALUE:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT]], <4 x i64> [[INDEX]] -; CHECK-NEXT: tail call void @llvm.masked.scatter.v4i32.v4p1(<4 x i32> [[VALUE]], <4 x ptr addrspace(1)> [[PTRS]], i32 4, <4 x i1> ) +; CHECK-NEXT: tail call void @llvm.masked.scatter.v4i32.v4p1(<4 x i32> [[VALUE]], <4 x ptr addrspace(1)> [[PTRS]], i32 4, <4 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll b/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll index 9e43aff4553044..397cb2af55c2c8 100644 --- a/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll +++ b/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll @@ -16,8 +16,8 @@ define i1 @test(i32 %tmp6) { define <2 x i1> @test_vec(<2 x i32> %tmp6) { ; CHECK-LABEL: @test_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP6:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP6:%.*]], splat (i32 71) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -12) ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %tmp7 = sdiv <2 x i32> %tmp6, diff --git a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll index fa63847cab2a2c..c723e88ad65dd9 100644 --- a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll +++ b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll @@ -20,8 +20,8 @@ define i16 @test1(i16 %a) { define <2 x i16> @test1_vec(<2 x i16> %a) { ; CHECK-LABEL: @test1_vec( -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i16> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = mul <2 x i16> [[A]], +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i16> [[A:%.*]], splat (i16 8) +; CHECK-NEXT: [[D:%.*]] = mul <2 x i16> [[A]], splat (i16 5) ; CHECK-NEXT: [[E:%.*]] = or <2 x i16> [[C]], [[D]] ; CHECK-NEXT: ret <2 x i16> [[E]] ; diff --git a/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll b/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll index 0038abe5e7b117..f334e42c85994a 100644 --- a/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll +++ b/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll @@ -17,7 +17,7 @@ entry: define <2 x i32> @a_vec(<2 x i32> %b) nounwind { ; CHECK-LABEL: @a_vec( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[TMP0]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll b/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll index f1f881b8a1bb00..d3cbd943432360 100644 --- a/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll +++ b/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll @@ -4,7 +4,7 @@ define <2 x i8> @foo(<2 x i8> %x) { ; CHECK-LABEL: @foo( -; CHECK-NEXT: [[A:%.*]] = srem <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = srem <2 x i8> [[X:%.*]], splat (i8 2) ; CHECK-NEXT: ret <2 x i8> [[A]] ; %A = srem <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll index 63f466a5024c82..89533e7de43d45 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll @@ -29,7 +29,7 @@ define <4 x i32> @constantMulARM64() nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @constantMulARM64( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 6) ; entry: %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind @@ -40,7 +40,7 @@ define <4 x i32> @constantMulSARM64() nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @constantMulSARM64( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 -1) ; entry: %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> ) nounwind @@ -51,7 +51,7 @@ define <4 x i32> @constantMulUARM64() nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @constantMulUARM64( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 65535) ; entry: %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> , <4 x i16> ) nounwind @@ -62,7 +62,7 @@ define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @complex1ARM64( ; CHECK-SAME: <4 x i16> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[A:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> , <4 x i16> [[X]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[A:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> splat (i16 2), <4 x i16> [[X]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: ret <4 x i32> [[A]] ; entry: @@ -75,7 +75,7 @@ define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @complex2ARM64( ; CHECK-SAME: <4 x i32> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[X]], +; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[X]], splat (i32 6) ; CHECK-NEXT: ret <4 x i32> [[B]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll index 122f0ce31f6290..c6695f17b955bf 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll @@ -17,7 +17,7 @@ define <16 x i8> @combineXorAeseNonZeroARM64(<16 x i8> %data, <16 x i8> %key) { ; CHECK-LABEL: define <16 x i8> @combineXorAeseNonZeroARM64( ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) { ; CHECK-NEXT: [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]] -; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> [[DATA_XOR]], <16 x i8> ) +; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> [[DATA_XOR]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: ret <16 x i8> [[DATA_AES]] ; %data.xor = xor <16 x i8> %data, %key @@ -40,7 +40,7 @@ define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) { ; CHECK-LABEL: define <16 x i8> @combineXorAesdNonZeroARM64( ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) { ; CHECK-NEXT: [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]] -; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> [[DATA_XOR]], <16 x i8> ) +; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> [[DATA_XOR]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: ret <16 x i8> [[DATA_AES]] ; %data.xor = xor <16 x i8> %data, %key diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index e4f8bb133e6ce6..779def76fc58d3 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -1155,7 +1155,7 @@ define <2 x half> @constant_cvt_pkrtz() { ; Test constant values where rtz changes result define <2 x half> @constant_rtz_pkrtz() { ; CHECK-LABEL: @constant_rtz_pkrtz( -; CHECK-NEXT: ret <2 x half> +; CHECK-NEXT: ret <2 x half> splat (half 0xH7BFF) ; %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0) ret <2 x half> %cvt diff --git a/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll index 5fbbc0ed57750a..5fc5709ff88971 100644 --- a/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll @@ -28,7 +28,7 @@ define <4 x i32> @constantMul() nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @constantMul( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 6) ; entry: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind @@ -39,7 +39,7 @@ define <4 x i32> @constantMulS() nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @constantMulS( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 -1) ; entry: %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> ) nounwind @@ -50,7 +50,7 @@ define <4 x i32> @constantMulU() nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @constantMulU( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 65535) ; entry: %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> , <4 x i16> ) nounwind @@ -61,7 +61,7 @@ define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @complex1( ; CHECK-SAME: <4 x i16> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[A:%.*]] = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> , <4 x i16> [[X]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[A:%.*]] = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> splat (i16 2), <4 x i16> [[X]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: ret <4 x i32> [[A]] ; entry: @@ -74,7 +74,7 @@ define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp { ; CHECK-LABEL: define <4 x i32> @complex2( ; CHECK-SAME: <4 x i32> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[X]], +; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[X]], splat (i32 6) ; CHECK-NEXT: ret <4 x i32> [[B]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll index 5369e9a94c32ed..0056d872ff9e33 100644 --- a/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll @@ -17,7 +17,7 @@ define <16 x i8> @combineXorAeseNonZeroARM(<16 x i8> %data, <16 x i8> %key) { ; CHECK-LABEL: define <16 x i8> @combineXorAeseNonZeroARM( ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) { ; CHECK-NEXT: [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]] -; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> [[DATA_XOR]], <16 x i8> ) +; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> [[DATA_XOR]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: ret <16 x i8> [[DATA_AES]] ; %data.xor = xor <16 x i8> %data, %key @@ -40,7 +40,7 @@ define <16 x i8> @combineXorAesdNonZeroARM(<16 x i8> %data, <16 x i8> %key) { ; CHECK-LABEL: define <16 x i8> @combineXorAesdNonZeroARM( ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) { ; CHECK-NEXT: [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]] -; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> [[DATA_XOR]], <16 x i8> ) +; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> [[DATA_XOR]], <16 x i8> splat (i8 -1)) ; CHECK-NEXT: ret <16 x i8> [[DATA_AES]] ; %data.xor = xor <16 x i8> %data, %key diff --git a/llvm/test/Transforms/InstCombine/ARM/mve-v2i2v.ll b/llvm/test/Transforms/InstCombine/ARM/mve-v2i2v.ll index d5fc6d34abca9c..ffad0ffb54f3e0 100644 --- a/llvm/test/Transforms/InstCombine/ARM/mve-v2i2v.ll +++ b/llvm/test/Transforms/InstCombine/ARM/mve-v2i2v.ll @@ -282,7 +282,7 @@ entry: define <2 x i1> @vpnot_2(<2 x i1> %vin) { ; CHECK-LABEL: @vpnot_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <2 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <2 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[VOUT]] ; entry: @@ -295,7 +295,7 @@ entry: define <4 x i1> @vpnot_4(<4 x i1> %vin) { ; CHECK-LABEL: @vpnot_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <4 x i1> [[VOUT]] ; entry: @@ -308,7 +308,7 @@ entry: define <8 x i1> @vpnot_8(<8 x i1> %vin) { ; CHECK-LABEL: @vpnot_8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <8 x i1> [[VOUT]] ; entry: @@ -321,7 +321,7 @@ entry: define <16 x i1> @vpnot_16(<16 x i1> %vin) { ; CHECK-LABEL: @vpnot_16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <16 x i1> [[VOUT]] ; entry: @@ -337,7 +337,7 @@ entry: define <2 x i1> @vpnot_narrow_2(<2 x i1> %vin) { ; CHECK-LABEL: @vpnot_narrow_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <2 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <2 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[VOUT]] ; entry: @@ -352,7 +352,7 @@ entry: define <4 x i1> @vpnot_narrow_4(<4 x i1> %vin) { ; CHECK-LABEL: @vpnot_narrow_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <4 x i1> [[VOUT]] ; entry: @@ -367,7 +367,7 @@ entry: define <8 x i1> @vpnot_narrow_8(<8 x i1> %vin) { ; CHECK-LABEL: @vpnot_narrow_8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <8 x i1> [[VOUT]] ; entry: @@ -382,7 +382,7 @@ entry: define <16 x i1> @vpnot_narrow_16(<16 x i1> %vin) { ; CHECK-LABEL: @vpnot_narrow_16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], +; CHECK-NEXT: [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], splat (i1 true) ; CHECK-NEXT: ret <16 x i1> [[VOUT]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll index 49f4729e746e86..5e2da429948812 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll @@ -158,7 +158,7 @@ define <4 x i64> @mload_v4i64(ptr %f) { define <4 x i64> @mload_v4i64_cmp(ptr %f, <4 x i64> %src) { ; CHECK-LABEL: @mload_v4i64_cmp( -; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt <4 x i64> [[SRC:%.*]], +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt <4 x i64> [[SRC:%.*]], splat (i64 -1) ; CHECK-NEXT: [[LD:%.*]] = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr [[F:%.*]], i32 1, <4 x i1> [[ICMP]], <4 x i64> zeroinitializer) ; CHECK-NEXT: ret <4 x i64> [[LD]] ; @@ -270,7 +270,7 @@ define void @mstore_v4f64(ptr %f, <4 x double> %v) { define void @mstore_v4f64_cmp(ptr %f, <4 x i32> %src, <4 x double> %v) { ; CHECK-LABEL: @mstore_v4f64_cmp( -; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt <4 x i32> [[SRC:%.*]], +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt <4 x i32> [[SRC:%.*]], splat (i32 -1) ; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[V:%.*]], ptr [[F:%.*]], i32 1, <4 x i1> [[ICMP]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll b/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll index 04bba79aada0ab..76e2f9af2ed1de 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll @@ -261,7 +261,7 @@ define i32 @zero_x86_avx2_pmovmskb() { define i32 @fold_x86_mmx_pmovmskb() { ; CHECK-LABEL: @fold_x86_mmx_pmovmskb( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> splat (i64 18084223940296448)) ; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = bitcast <8 x i8> to <1 x i64> diff --git a/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll index 80d3c42341f820..394754b7b54428 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll @@ -182,8 +182,8 @@ define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[TMP3]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], splat (i64 4294967295) ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i64> [[TMP5]], [[TMP6]] ; CHECK-NEXT: ret <4 x i64> [[TMP7]] ; @@ -199,8 +199,8 @@ define <8 x i64> @test_demanded_elts_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP3]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP4]], splat (i64 4294967295) ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <8 x i64> [[TMP5]], [[TMP6]] ; CHECK-NEXT: ret <8 x i64> [[TMP7]] ; @@ -216,10 +216,10 @@ define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], splat (i64 32) ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <2 x i64> [[TMP6]], [[TMP8]] ; CHECK-NEXT: ret <2 x i64> [[TMP9]] ; @@ -235,10 +235,10 @@ define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], splat (i64 32) ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i64> [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i64> [[TMP10]] @@ -256,10 +256,10 @@ define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], splat (i64 32) ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <8 x i64> [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i64> [[TMP10]] diff --git a/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll b/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll index 5504fa15dc3026..0b0b2349bb061d 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll @@ -182,8 +182,8 @@ define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[TMP3]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], splat (i64 4294967295) ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i64> [[TMP5]], [[TMP6]] ; CHECK-NEXT: ret <4 x i64> [[TMP7]] ; @@ -199,8 +199,8 @@ define <8 x i64> @test_demanded_elts_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP3]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP4]], splat (i64 4294967295) ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <8 x i64> [[TMP5]], [[TMP6]] ; CHECK-NEXT: ret <8 x i64> [[TMP7]] ; @@ -216,10 +216,10 @@ define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], splat (i64 32) ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <2 x i64> [[TMP6]], [[TMP8]] ; CHECK-NEXT: ret <2 x i64> [[TMP9]] ; @@ -235,10 +235,10 @@ define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], splat (i64 32) ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i64> [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i64> [[TMP10]] @@ -256,10 +256,10 @@ define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], splat (i64 32) ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <8 x i64> [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i64> [[TMP10]] diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll index 74d58bae742746..43e5f0cbefc9be 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll @@ -359,8 +359,8 @@ define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_128( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -372,8 +372,8 @@ define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_128( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -385,8 +385,8 @@ define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_128( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -398,8 +398,8 @@ define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_128( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -411,8 +411,8 @@ define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_256( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], splat (i32 23) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; @@ -424,8 +424,8 @@ define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_256( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; @@ -437,8 +437,8 @@ define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_256( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) ; CHECK-NEXT: ret <32 x i8> [[TMP3]] ; @@ -450,8 +450,8 @@ define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_256( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) ; CHECK-NEXT: ret <32 x i8> [[TMP3]] ; @@ -463,8 +463,8 @@ define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_512( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], splat (i32 23) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; @@ -476,8 +476,8 @@ define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_512( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; @@ -489,8 +489,8 @@ define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_512( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) ; CHECK-NEXT: ret <64 x i8> [[TMP3]] ; @@ -502,8 +502,8 @@ define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { define <64 x i8> @trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_512( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) ; CHECK-NEXT: ret <64 x i8> [[TMP3]] ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll index 6ddcb856692b0f..15439ebfa2151d 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll @@ -359,8 +359,8 @@ define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_128( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -372,8 +372,8 @@ define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_128( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; @@ -385,8 +385,8 @@ define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_128( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -398,8 +398,8 @@ define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_128( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) ; CHECK-NEXT: ret <16 x i8> [[TMP3]] ; @@ -411,8 +411,8 @@ define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_256( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], splat (i32 23) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; @@ -424,8 +424,8 @@ define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_256( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; @@ -437,8 +437,8 @@ define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_256( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) ; CHECK-NEXT: ret <32 x i8> [[TMP3]] ; @@ -450,8 +450,8 @@ define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_256( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) ; CHECK-NEXT: ret <32 x i8> [[TMP3]] ; @@ -463,8 +463,8 @@ define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @trunc_packssdw_512( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], splat (i32 23) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; @@ -476,8 +476,8 @@ define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @trunc_packusdw_512( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], splat (i32 17) +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], splat (i32 15) ; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; @@ -489,8 +489,8 @@ define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @trunc_packsswb_512( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) ; CHECK-NEXT: ret <64 x i8> [[TMP3]] ; @@ -502,8 +502,8 @@ define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { define <64 x i8> @trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { ; CHECK-LABEL: @trunc_packuswb_512( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], splat (i16 15) +; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1) ; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) ; CHECK-NEXT: ret <64 x i8> [[TMP3]] ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll b/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll index 947c7d38d26ee0..4d4d619ea7cb41 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll @@ -111,7 +111,7 @@ define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) { define <8 x i16> @one_pmulh_128(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_128( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> ) @@ -120,7 +120,7 @@ define <8 x i16> @one_pmulh_128(<8 x i16> %a0) { define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_128_commute( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> , <8 x i16> %a0) @@ -129,7 +129,7 @@ define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) { define <16 x i16> @one_pmulh_256(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_256( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> ) @@ -138,7 +138,7 @@ define <16 x i16> @one_pmulh_256(<16 x i16> %a0) { define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_256_commute( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> , <16 x i16> %a0) @@ -147,7 +147,7 @@ define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) { define <32 x i16> @one_pmulh_512(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_512( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %a0, <32 x i16> ) @@ -156,7 +156,7 @@ define <32 x i16> @one_pmulh_512(<32 x i16> %a0) { define <32 x i16> @one_pmulh_512_commute(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_512_commute( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> , <32 x i16> %a0) diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll b/llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll index 40ad11a6993915..3b14185edde71c 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll @@ -111,7 +111,7 @@ define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) { define <8 x i16> @one_pmulh_128(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_128( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> [[A0:%.*]], <8 x i16> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> [[A0:%.*]], <8 x i16> splat (i16 1)) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> ) @@ -120,7 +120,7 @@ define <8 x i16> @one_pmulh_128(<8 x i16> %a0) { define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_128_commute( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> , <8 x i16> [[A0:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> splat (i16 1), <8 x i16> [[A0:%.*]]) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> , <8 x i16> %a0) @@ -129,7 +129,7 @@ define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) { define <16 x i16> @one_pmulh_256(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_256( -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> splat (i16 1)) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> ) @@ -138,7 +138,7 @@ define <16 x i16> @one_pmulh_256(<16 x i16> %a0) { define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_256_commute( -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> , <16 x i16> [[A0:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> splat (i16 1), <16 x i16> [[A0:%.*]]) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> , <16 x i16> %a0) @@ -147,7 +147,7 @@ define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) { define <32 x i16> @one_pmulh_512(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_512( -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[A0:%.*]], <32 x i16> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[A0:%.*]], <32 x i16> splat (i16 1)) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %a0, <32 x i16> ) @@ -156,7 +156,7 @@ define <32 x i16> @one_pmulh_512(<32 x i16> %a0) { define <32 x i16> @one_pmulh_512_commute(<32 x i16> %a0) { ; CHECK-LABEL: @one_pmulh_512_commute( -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> , <32 x i16> [[A0:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> splat (i16 1), <32 x i16> [[A0:%.*]]) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> , <32 x i16> %a0) diff --git a/llvm/test/Transforms/InstCombine/X86/x86-ternlog.ll b/llvm/test/Transforms/InstCombine/X86/x86-ternlog.ll index 951ed6d3d108ab..2376d18748ee13 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-ternlog.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-ternlog.ll @@ -144,7 +144,7 @@ define <8 x i32> @vpternlog_d_v256_imm14(<8 x i32> %v0, <8 x i32> %v1, <8 x i32> define <8 x i64> @vpternlog_q_v512_imm15(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2) { ; CHECK-LABEL: @vpternlog_q_v512_imm15( -; CHECK-NEXT: [[R:%.*]] = xor <8 x i64> [[V0:%.*]], +; CHECK-NEXT: [[R:%.*]] = xor <8 x i64> [[V0:%.*]], splat (i64 -1) ; CHECK-NEXT: ret <8 x i64> [[R]] ; %r = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2, i32 15) @@ -468,7 +468,7 @@ define <8 x i32> @vpternlog_d_v256_imm50(<8 x i32> %v0, <8 x i32> %v1, <8 x i32> define <8 x i64> @vpternlog_q_v512_imm51(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2) { ; CHECK-LABEL: @vpternlog_q_v512_imm51( -; CHECK-NEXT: [[R:%.*]] = xor <8 x i64> [[V1:%.*]], +; CHECK-NEXT: [[R:%.*]] = xor <8 x i64> [[V1:%.*]], splat (i64 -1) ; CHECK-NEXT: ret <8 x i64> [[R]] ; %r = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2, i32 51) @@ -774,7 +774,7 @@ define <16 x i32> @vpternlog_d_v512_imm84(<16 x i32> %v0, <16 x i32> %v1, <16 x define <2 x i64> @vpternlog_q_v128_imm85(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2) { ; CHECK-LABEL: @vpternlog_q_v128_imm85( -; CHECK-NEXT: [[R:%.*]] = xor <2 x i64> [[V2:%.*]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i64> [[V2:%.*]], splat (i64 -1) ; CHECK-NEXT: ret <2 x i64> [[R]] ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, i32 85) @@ -2301,7 +2301,7 @@ define <8 x i32> @vpternlog_d_v256_imm254(<8 x i32> %v0, <8 x i32> %v1, <8 x i32 define <8 x i64> @vpternlog_q_v512_imm255(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2) { ; CHECK-LABEL: @vpternlog_q_v512_imm255( -; CHECK-NEXT: ret <8 x i64> +; CHECK-NEXT: ret <8 x i64> splat (i64 -1) ; %r = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2, i32 255) ret <8 x i64> %r @@ -2797,7 +2797,7 @@ define <16 x i32> @vpternlog_d_constv512_imm60() { define <2 x i64> @vpternlog_q_constv128_imm61() { ; CHECK-LABEL: @vpternlog_q_constv128_imm61( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -5) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 61) ret <2 x i64> %r @@ -3037,7 +3037,7 @@ define <16 x i32> @vpternlog_d_constv512_imm90() { define <2 x i64> @vpternlog_q_constv128_imm91() { ; CHECK-LABEL: @vpternlog_q_constv128_imm91( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 91) ret <2 x i64> %r @@ -3085,7 +3085,7 @@ define <16 x i32> @vpternlog_d_constv512_imm96() { define <2 x i64> @vpternlog_q_constv128_imm97() { ; CHECK-LABEL: @vpternlog_q_constv128_imm97( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -14) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 97) ret <2 x i64> %r @@ -3133,7 +3133,7 @@ define <16 x i32> @vpternlog_d_constv512_imm102() { define <2 x i64> @vpternlog_q_constv128_imm103() { ; CHECK-LABEL: @vpternlog_q_constv128_imm103( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 103) ret <2 x i64> %r @@ -3325,7 +3325,7 @@ define <16 x i32> @vpternlog_d_constv512_imm126() { define <2 x i64> @vpternlog_q_constv128_imm127() { ; CHECK-LABEL: @vpternlog_q_constv128_imm127( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 127) ret <2 x i64> %r @@ -3373,7 +3373,7 @@ define <16 x i32> @vpternlog_d_constv512_imm132() { define <2 x i64> @vpternlog_q_constv128_imm133() { ; CHECK-LABEL: @vpternlog_q_constv128_imm133( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -14) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 133) ret <2 x i64> %r @@ -3565,7 +3565,7 @@ define <16 x i32> @vpternlog_d_constv512_imm156() { define <2 x i64> @vpternlog_q_constv128_imm157() { ; CHECK-LABEL: @vpternlog_q_constv128_imm157( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 157) ret <2 x i64> %r @@ -3837,7 +3837,7 @@ define <4 x i32> @vpternlog_d_constv128_imm190() { define <4 x i64> @vpternlog_q_constv256_imm191() { ; CHECK-LABEL: @vpternlog_q_constv256_imm191( -; CHECK-NEXT: ret <4 x i64> +; CHECK-NEXT: ret <4 x i64> splat (i64 -1) ; %r = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> , <4 x i64> , <4 x i64> , i32 191) ret <4 x i64> %r @@ -4093,7 +4093,7 @@ define <16 x i32> @vpternlog_d_constv512_imm222() { define <2 x i64> @vpternlog_q_constv128_imm223() { ; CHECK-LABEL: @vpternlog_q_constv128_imm223( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 223) ret <2 x i64> %r @@ -4285,7 +4285,7 @@ define <16 x i32> @vpternlog_d_constv512_imm246() { define <2 x i64> @vpternlog_q_constv128_imm247() { ; CHECK-LABEL: @vpternlog_q_constv128_imm247( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> , <2 x i64> , <2 x i64> , i32 247) ret <2 x i64> %r @@ -4349,7 +4349,7 @@ define <8 x i32> @vpternlog_d_constv256_imm254() { define <8 x i64> @vpternlog_q_constv512_imm255() { ; CHECK-LABEL: @vpternlog_q_constv512_imm255( -; CHECK-NEXT: ret <8 x i64> +; CHECK-NEXT: ret <8 x i64> splat (i64 -1) ; %r = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> , <8 x i64> , <8 x i64> , i32 255) ret <8 x i64> %r diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll index 5b6ce56b4b3265..5a74511139c117 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll @@ -16,7 +16,7 @@ define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrai_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15) @@ -25,7 +25,7 @@ define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrai_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64) @@ -42,7 +42,7 @@ define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrai_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15) @@ -51,7 +51,7 @@ define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrai_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64) @@ -68,7 +68,7 @@ define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrai_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15) @@ -77,7 +77,7 @@ define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrai_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64) @@ -94,7 +94,7 @@ define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrai_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15) @@ -103,7 +103,7 @@ define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrai_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64) @@ -120,7 +120,7 @@ define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) { define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_128_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15) @@ -129,7 +129,7 @@ define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) { define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_128_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64) @@ -146,7 +146,7 @@ define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) { define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_256_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15) @@ -155,7 +155,7 @@ define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) { define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_256_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64) @@ -172,7 +172,7 @@ define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrai_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15) @@ -181,7 +181,7 @@ define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) { define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrai_w_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64) @@ -198,7 +198,7 @@ define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrai_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15) @@ -207,7 +207,7 @@ define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) { define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrai_d_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64) @@ -224,7 +224,7 @@ define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15) @@ -233,7 +233,7 @@ define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) { define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64) @@ -254,7 +254,7 @@ define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15) @@ -279,7 +279,7 @@ define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15) @@ -304,7 +304,7 @@ define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) { define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15) @@ -329,7 +329,7 @@ define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15) @@ -354,7 +354,7 @@ define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15) @@ -379,7 +379,7 @@ define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) { define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15) @@ -404,7 +404,7 @@ define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrli_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15) @@ -429,7 +429,7 @@ define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrli_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15) @@ -454,7 +454,7 @@ define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrli_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15) @@ -483,7 +483,7 @@ define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) { define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_pslli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15) @@ -508,7 +508,7 @@ define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) { define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_pslli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15) @@ -533,7 +533,7 @@ define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) { define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_pslli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15) @@ -558,7 +558,7 @@ define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) { define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_pslli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15) @@ -583,7 +583,7 @@ define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) { define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_pslli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15) @@ -608,7 +608,7 @@ define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) { define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_pslli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15) @@ -633,7 +633,7 @@ define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_pslli_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15) @@ -658,7 +658,7 @@ define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_pslli_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15) @@ -683,7 +683,7 @@ define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_pslli_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15) @@ -712,7 +712,7 @@ define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) @@ -721,7 +721,7 @@ define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) @@ -730,7 +730,7 @@ define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) @@ -747,7 +747,7 @@ define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) @@ -756,7 +756,7 @@ define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) @@ -765,7 +765,7 @@ define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) @@ -782,7 +782,7 @@ define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) @@ -791,7 +791,7 @@ define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) @@ -800,7 +800,7 @@ define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) @@ -817,7 +817,7 @@ define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) @@ -826,7 +826,7 @@ define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) @@ -835,7 +835,7 @@ define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) @@ -852,7 +852,7 @@ define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) { define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_128_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> ) @@ -861,7 +861,7 @@ define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) { define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_128_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> ) @@ -878,7 +878,7 @@ define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) { define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_256_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> ) @@ -887,7 +887,7 @@ define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) { define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_256_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> ) @@ -904,7 +904,7 @@ define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) @@ -913,7 +913,7 @@ define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) { define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) @@ -922,7 +922,7 @@ define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) { define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) @@ -939,7 +939,7 @@ define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) @@ -948,7 +948,7 @@ define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) { define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) @@ -957,7 +957,7 @@ define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) { define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) @@ -974,7 +974,7 @@ define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> ) @@ -983,7 +983,7 @@ define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) { define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> ) @@ -1004,7 +1004,7 @@ define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrl_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) @@ -1037,7 +1037,7 @@ define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrl_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) @@ -1070,7 +1070,7 @@ define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) { define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrl_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) @@ -1095,7 +1095,7 @@ define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrl_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) @@ -1128,7 +1128,7 @@ define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrl_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) @@ -1161,7 +1161,7 @@ define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) { define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrl_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) @@ -1186,7 +1186,7 @@ define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrl_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> ) @@ -1219,7 +1219,7 @@ define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrl_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> ) @@ -1252,7 +1252,7 @@ define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrl_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> ) @@ -1281,7 +1281,7 @@ define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psll_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> ) @@ -1314,7 +1314,7 @@ define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psll_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> ) @@ -1347,7 +1347,7 @@ define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) { define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psll_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> ) @@ -1372,7 +1372,7 @@ define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psll_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> ) @@ -1405,7 +1405,7 @@ define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psll_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> ) @@ -1438,7 +1438,7 @@ define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) { define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psll_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> ) @@ -1463,7 +1463,7 @@ define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psll_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> ) @@ -1496,7 +1496,7 @@ define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psll_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> ) @@ -1529,7 +1529,7 @@ define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psll_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> ) @@ -2911,7 +2911,7 @@ define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) { define <4 x i32> @avx2_psrav_d_128_masked(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psrav_d_128_masked( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll index b1e5fa4f9e1c94..d38e22a4e3851b 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll @@ -16,7 +16,7 @@ define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrai_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15) @@ -25,7 +25,7 @@ define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrai_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64) @@ -42,7 +42,7 @@ define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrai_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15) @@ -51,7 +51,7 @@ define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrai_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64) @@ -68,7 +68,7 @@ define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrai_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15) @@ -77,7 +77,7 @@ define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrai_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64) @@ -94,7 +94,7 @@ define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrai_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15) @@ -103,7 +103,7 @@ define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrai_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64) @@ -120,7 +120,7 @@ define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) { define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_128_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15) @@ -129,7 +129,7 @@ define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) { define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_128_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64) @@ -146,7 +146,7 @@ define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) { define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_256_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15) @@ -155,7 +155,7 @@ define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) { define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_256_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64) @@ -172,7 +172,7 @@ define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrai_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15) @@ -181,7 +181,7 @@ define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) { define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrai_w_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64) @@ -198,7 +198,7 @@ define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrai_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15) @@ -207,7 +207,7 @@ define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) { define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrai_d_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64) @@ -224,7 +224,7 @@ define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15) @@ -233,7 +233,7 @@ define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) { define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrai_q_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64) @@ -254,7 +254,7 @@ define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15) @@ -279,7 +279,7 @@ define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15) @@ -304,7 +304,7 @@ define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) { define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15) @@ -329,7 +329,7 @@ define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15) @@ -354,7 +354,7 @@ define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15) @@ -379,7 +379,7 @@ define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) { define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15) @@ -404,7 +404,7 @@ define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrli_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15) @@ -429,7 +429,7 @@ define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrli_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15) @@ -454,7 +454,7 @@ define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrli_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15) @@ -483,7 +483,7 @@ define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) { define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_pslli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15) @@ -508,7 +508,7 @@ define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) { define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_pslli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15) @@ -533,7 +533,7 @@ define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) { define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_pslli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15) @@ -558,7 +558,7 @@ define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) { define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_pslli_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15) @@ -583,7 +583,7 @@ define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) { define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_pslli_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15) @@ -608,7 +608,7 @@ define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) { define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_pslli_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15) @@ -633,7 +633,7 @@ define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_pslli_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15) @@ -658,7 +658,7 @@ define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_pslli_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15) @@ -683,7 +683,7 @@ define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_pslli_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15) @@ -712,7 +712,7 @@ define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) @@ -721,7 +721,7 @@ define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) @@ -730,7 +730,7 @@ define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psra_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> ) @@ -747,7 +747,7 @@ define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) @@ -756,7 +756,7 @@ define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) @@ -765,7 +765,7 @@ define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psra_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> ) @@ -782,7 +782,7 @@ define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) @@ -791,7 +791,7 @@ define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) @@ -800,7 +800,7 @@ define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psra_w_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> ) @@ -817,7 +817,7 @@ define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) @@ -826,7 +826,7 @@ define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) @@ -835,7 +835,7 @@ define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psra_d_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> ) @@ -852,7 +852,7 @@ define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) { define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_128_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> ) @@ -861,7 +861,7 @@ define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) { define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_128_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> ) @@ -878,7 +878,7 @@ define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) { define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_256_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> ) @@ -887,7 +887,7 @@ define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) { define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_256_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> ) @@ -904,7 +904,7 @@ define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) @@ -913,7 +913,7 @@ define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) { define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) @@ -922,7 +922,7 @@ define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) { define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psra_w_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> ) @@ -939,7 +939,7 @@ define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) @@ -948,7 +948,7 @@ define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) { define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_15_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) @@ -957,7 +957,7 @@ define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) { define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psra_d_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> ) @@ -974,7 +974,7 @@ define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> ) @@ -983,7 +983,7 @@ define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) { define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psra_q_512_64( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> ) @@ -1004,7 +1004,7 @@ define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psrl_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) @@ -1037,7 +1037,7 @@ define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psrl_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) @@ -1070,7 +1070,7 @@ define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) { define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psrl_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) @@ -1095,7 +1095,7 @@ define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psrl_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) @@ -1128,7 +1128,7 @@ define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrl_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) @@ -1161,7 +1161,7 @@ define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) { define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrl_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) @@ -1186,7 +1186,7 @@ define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psrl_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> ) @@ -1219,7 +1219,7 @@ define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psrl_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> ) @@ -1252,7 +1252,7 @@ define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psrl_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> ) @@ -1281,7 +1281,7 @@ define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) { define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) { ; CHECK-LABEL: @sse2_psll_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> ) @@ -1314,7 +1314,7 @@ define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) { define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) { ; CHECK-LABEL: @sse2_psll_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> ) @@ -1347,7 +1347,7 @@ define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) { define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) { ; CHECK-LABEL: @sse2_psll_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> ) @@ -1372,7 +1372,7 @@ define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) { define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) { ; CHECK-LABEL: @avx2_psll_w_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> ) @@ -1405,7 +1405,7 @@ define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) { define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psll_d_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> ) @@ -1438,7 +1438,7 @@ define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) { define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psll_q_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> ) @@ -1463,7 +1463,7 @@ define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) { define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) { ; CHECK-LABEL: @avx512_psll_w_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> ) @@ -1496,7 +1496,7 @@ define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) { define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) { ; CHECK-LABEL: @avx512_psll_d_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> ) @@ -1529,7 +1529,7 @@ define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) { define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) { ; CHECK-LABEL: @avx512_psll_q_512_15( -; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> ) @@ -2951,7 +2951,7 @@ define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) { define <4 x i32> @avx2_psrav_d_128_masked(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psrav_d_128_masked( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll index ec010b02c86a02..b95f00a1e9df03 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll @@ -199,7 +199,7 @@ define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) { define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: @cmp_strue_v16i8( -; CHECK-NEXT: ret <16 x i8> +; CHECK-NEXT: ret <16 x i8> splat (i8 -1) ; %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %1 @@ -207,7 +207,7 @@ define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: @cmp_utrue_v16i8( -; CHECK-NEXT: ret <16 x i8> +; CHECK-NEXT: ret <16 x i8> splat (i8 -1) ; %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %1 diff --git a/llvm/test/Transforms/InstCombine/X86/x86-xop.ll b/llvm/test/Transforms/InstCombine/X86/x86-xop.ll index 644cbc8824f3b3..81db68145638c1 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-xop.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-xop.ll @@ -199,7 +199,7 @@ define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) { define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: @cmp_strue_v16i8( -; CHECK-NEXT: ret <16 x i8> +; CHECK-NEXT: ret <16 x i8> splat (i8 -1) ; %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %1 @@ -207,7 +207,7 @@ define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: @cmp_utrue_v16i8( -; CHECK-NEXT: ret <16 x i8> +; CHECK-NEXT: ret <16 x i8> splat (i8 -1) ; %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %1 diff --git a/llvm/test/Transforms/InstCombine/abs-1.ll b/llvm/test/Transforms/InstCombine/abs-1.ll index 63287e59f66346..7037647d116ba2 100644 --- a/llvm/test/Transforms/InstCombine/abs-1.ll +++ b/llvm/test/Transforms/InstCombine/abs-1.ll @@ -385,7 +385,7 @@ define <2 x i8> @shifty_abs_commute1(<2 x i8> %x) { define <2 x i8> @shifty_abs_commute2(<2 x i8> %x) { ; CHECK-LABEL: @shifty_abs_commute2( -; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[ABS:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[Y]], i1 false) ; CHECK-NEXT: ret <2 x i8> [[ABS]] ; diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index f7c639f1d8e6ba..022d60d2f501bf 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -178,9 +178,9 @@ define i32 @abs_trailing_zeros_negative(i32 %x) { define <4 x i32> @abs_trailing_zeros_negative_vec(<4 x i32> %x) { ; CHECK-LABEL: @abs_trailing_zeros_negative_vec( -; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 -2) ; CHECK-NEXT: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[AND]], i1 false) -; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[ABS]], +; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[ABS]], splat (i32 -4) ; CHECK-NEXT: ret <4 x i32> [[AND2]] ; %and = and <4 x i32> %x, @@ -207,7 +207,7 @@ define i32 @abs_signbits(i30 %x) { define <4 x i32> @abs_signbits_vec(<4 x i30> %x) { ; CHECK-LABEL: @abs_signbits_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i30> @llvm.abs.v4i30(<4 x i30> [[X:%.*]], i1 false) -; CHECK-NEXT: [[NARROW:%.*]] = add nuw <4 x i30> [[TMP1]], +; CHECK-NEXT: [[NARROW:%.*]] = add nuw <4 x i30> [[TMP1]], splat (i30 1) ; CHECK-NEXT: [[ADD:%.*]] = zext <4 x i30> [[NARROW]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[ADD]] ; @@ -317,7 +317,7 @@ define i32 @zext_abs(i31 %x) { define <3 x i82> @lshr_abs(<3 x i82> %x) { ; CHECK-LABEL: @lshr_abs( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], splat (i82 1) ; CHECK-NEXT: ret <3 x i82> [[LSHR]] ; %lshr = lshr <3 x i82> %x, @@ -497,7 +497,7 @@ define i32 @demand_low_bit(i32 %x) { define <3 x i82> @demand_low_bit_int_min_is_poison(<3 x i82> %x) { ; CHECK-LABEL: @demand_low_bit_int_min_is_poison( -; CHECK-NEXT: [[R:%.*]] = shl <3 x i82> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = shl <3 x i82> [[X:%.*]], splat (i82 81) ; CHECK-NEXT: ret <3 x i82> [[R]] ; %a = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %x, i1 true) @@ -530,9 +530,9 @@ define i32 @srem_by_2_int_min_is_poison(i32 %x) { define <3 x i82> @srem_by_2(<3 x i82> %x, ptr %p) { ; CHECK-LABEL: @srem_by_2( -; CHECK-NEXT: [[S:%.*]] = srem <3 x i82> [[X:%.*]], +; CHECK-NEXT: [[S:%.*]] = srem <3 x i82> [[X:%.*]], splat (i82 2) ; CHECK-NEXT: store <3 x i82> [[S]], ptr [[P:%.*]], align 32 -; CHECK-NEXT: [[R:%.*]] = and <3 x i82> [[X]], +; CHECK-NEXT: [[R:%.*]] = and <3 x i82> [[X]], splat (i82 1) ; CHECK-NEXT: ret <3 x i82> [[R]] ; %s = srem <3 x i82> %x, diff --git a/llvm/test/Transforms/InstCombine/add-mask-neg.ll b/llvm/test/Transforms/InstCombine/add-mask-neg.ll index b72f051a0b799b..cb23763249d636 100644 --- a/llvm/test/Transforms/InstCombine/add-mask-neg.ll +++ b/llvm/test/Transforms/InstCombine/add-mask-neg.ll @@ -78,8 +78,8 @@ define i32 @dec_mask_multiuse_neg_i32(i32 %X) { define <2 x i32> @dec_mask_neg_v2i32(<2 x i32> %X) { ; CHECK-LABEL: @dec_mask_neg_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], splat (i32 -1) ; CHECK-NEXT: [[DEC:%.*]] = and <2 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[DEC]] ; @@ -91,8 +91,8 @@ define <2 x i32> @dec_mask_neg_v2i32(<2 x i32> %X) { define <2 x i32> @dec_mask_neg_v2i32_poison(<2 x i32> %X) { ; CHECK-LABEL: @dec_mask_neg_v2i32_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], splat (i32 -1) ; CHECK-NEXT: [[DEC:%.*]] = and <2 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[DEC]] ; @@ -106,7 +106,7 @@ define <2 x i32> @dec_mask_multiuse_neg_multiuse_v2i32(<2 x i32> %X) { ; CHECK-LABEL: @dec_mask_multiuse_neg_multiuse_v2i32( ; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[X]], [[NEG]] -; CHECK-NEXT: [[DEC:%.*]] = add <2 x i32> [[MASK]], +; CHECK-NEXT: [[DEC:%.*]] = add <2 x i32> [[MASK]], splat (i32 -1) ; CHECK-NEXT: call void @usev(<2 x i32> [[NEG]]) ; CHECK-NEXT: call void @usev(<2 x i32> [[MASK]]) ; CHECK-NEXT: ret <2 x i32> [[DEC]] diff --git a/llvm/test/Transforms/InstCombine/add-mask.ll b/llvm/test/Transforms/InstCombine/add-mask.ll index 00c14e351642e5..6cbb3442d34e09 100644 --- a/llvm/test/Transforms/InstCombine/add-mask.ll +++ b/llvm/test/Transforms/InstCombine/add-mask.ll @@ -32,7 +32,7 @@ define i32 @add_mask_sign_commute_i32(i32 %x) { define <2 x i32> @add_mask_sign_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @add_mask_sign_v2i32( ; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> splat (i32 7), <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = ashr <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/add-shl-sdiv-to-srem.ll b/llvm/test/Transforms/InstCombine/add-shl-sdiv-to-srem.ll index 82224496d90ec7..84462f9a7f592b 100644 --- a/llvm/test/Transforms/InstCombine/add-shl-sdiv-to-srem.ll +++ b/llvm/test/Transforms/InstCombine/add-shl-sdiv-to-srem.ll @@ -38,7 +38,7 @@ define i32 @add-shl-sdiv-scalar2(i32 %x) { define <3 x i8> @add-shl-sdiv-splat0(<3 x i8> %x) { ; CHECK-LABEL: @add-shl-sdiv-splat0( -; CHECK-NEXT: [[RZ:%.*]] = srem <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RZ:%.*]] = srem <3 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <3 x i8> [[RZ]] ; %sd = sdiv <3 x i8> %x, @@ -49,7 +49,7 @@ define <3 x i8> @add-shl-sdiv-splat0(<3 x i8> %x) { define <4 x i32> @add-shl-sdiv-splat1(<4 x i32> %x) { ; CHECK-LABEL: @add-shl-sdiv-splat1( -; CHECK-NEXT: [[RZ:%.*]] = srem <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[RZ:%.*]] = srem <4 x i32> [[X:%.*]], splat (i32 1073741824) ; CHECK-NEXT: ret <4 x i32> [[RZ]] ; %sd = sdiv <4 x i32> %x, @@ -60,7 +60,7 @@ define <4 x i32> @add-shl-sdiv-splat1(<4 x i32> %x) { define <2 x i64> @add-shl-sdiv-splat2(<2 x i64> %x) { ; CHECK-LABEL: @add-shl-sdiv-splat2( -; CHECK-NEXT: [[RZ:%.*]] = srem <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[RZ:%.*]] = srem <2 x i64> [[X:%.*]], splat (i64 32) ; CHECK-NEXT: ret <2 x i64> [[RZ]] ; %sd = sdiv <2 x i64> %x, @@ -134,9 +134,9 @@ define i32 @add-shl-sdiv-i32-use3(i32 %x) { declare void @use3xi8(<3 x i8>) define <3 x i8> @add-shl-sdiv-use4(<3 x i8> %x) { ; CHECK-LABEL: @add-shl-sdiv-use4( -; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 -4) ; CHECK-NEXT: call void @use3xi8(<3 x i8> [[SD]]) -; CHECK-NEXT: [[RZ:%.*]] = srem <3 x i8> [[X]], +; CHECK-NEXT: [[RZ:%.*]] = srem <3 x i8> [[X]], splat (i8 4) ; CHECK-NEXT: ret <3 x i8> [[RZ]] ; %sd = sdiv <3 x i8> %x, @@ -187,8 +187,8 @@ define i32 @add-shl-sdiv-negative2(i32 %x) { define <3 x i8> @add-shl-sdiv-negative3(<3 x i8> %x) { ; CHECK-LABEL: @add-shl-sdiv-negative3( -; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[SL:%.*]] = shl <3 x i8> [[SD]], +; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 -5) +; CHECK-NEXT: [[SL:%.*]] = shl <3 x i8> [[SD]], splat (i8 2) ; CHECK-NEXT: [[RZ:%.*]] = add <3 x i8> [[SL]], [[X]] ; CHECK-NEXT: ret <3 x i8> [[RZ]] ; @@ -222,7 +222,7 @@ define <3 x i8> @add-shl-sdiv-3xi8-undef0(<3 x i8> %x) { define <3 x i8> @add-shl-sdiv-3xi8-undef1(<3 x i8> %x) { ; CHECK-LABEL: @add-shl-sdiv-3xi8-undef1( -; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 -4) ; CHECK-NEXT: [[SL:%.*]] = shl <3 x i8> [[SD]], ; CHECK-NEXT: [[RZ:%.*]] = add <3 x i8> [[SL]], [[X]] ; CHECK-NEXT: ret <3 x i8> [[RZ]] @@ -250,7 +250,7 @@ define <2 x i64> @add-shl-sdiv-nonsplat0(<2 x i64> %x) { define <3 x i8> @add-shl-sdiv-nonsplat1(<3 x i8> %x) { ; CHECK-LABEL: @add-shl-sdiv-nonsplat1( -; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 -4) ; CHECK-NEXT: [[SL:%.*]] = shl <3 x i8> [[SD]], ; CHECK-NEXT: [[RZ:%.*]] = add <3 x i8> [[SL]], [[X]] ; CHECK-NEXT: ret <3 x i8> [[RZ]] diff --git a/llvm/test/Transforms/InstCombine/add-sitofp.ll b/llvm/test/Transforms/InstCombine/add-sitofp.ll index f1afcaf5f85d2a..fae1365dfa8533 100644 --- a/llvm/test/Transforms/InstCombine/add-sitofp.ll +++ b/llvm/test/Transforms/InstCombine/add-sitofp.ll @@ -101,8 +101,8 @@ define float @test_3(i32 %a, i32 %b) { define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @test_4( -; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], +; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 1073741823) +; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], splat (i32 1073741823) ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i32> [[A_AND]], [[B_AND]] ; CHECK-NEXT: [[RES:%.*]] = uitofp nneg <4 x i32> [[TMP1]] to <4 x double> ; CHECK-NEXT: ret <4 x double> [[RES]] @@ -120,8 +120,8 @@ define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) { define <4 x float> @test_4_neg(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @test_4_neg( -; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], +; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 1073741823) +; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], splat (i32 1073741823) ; CHECK-NEXT: [[A_AND_FP:%.*]] = uitofp nneg <4 x i32> [[A_AND]] to <4 x float> ; CHECK-NEXT: [[B_AND_FP:%.*]] = uitofp nneg <4 x i32> [[B_AND]] to <4 x float> ; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A_AND_FP]], [[B_AND_FP]] diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 417c3a950d7805..4b1159cf07e710 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -17,7 +17,7 @@ define i32 @select_0_or_1_from_bool(i1 %x) { define <2 x i32> @select_0_or_1_from_bool_vec(<2 x i1> %x) { ; CHECK-LABEL: @select_0_or_1_from_bool_vec( -; CHECK-NEXT: [[NOT_X:%.*]] = xor <2 x i1> [[X:%.*]], +; CHECK-NEXT: [[NOT_X:%.*]] = xor <2 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i1> [[NOT_X]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[ADD]] ; @@ -62,8 +62,8 @@ define i32 @flip_and_mask(i32 %x) { define <2 x i8> @flip_and_mask_splat(<2 x i8> %x) { ; CHECK-LABEL: @flip_and_mask_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[INC:%.*]] = xor <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 1) +; CHECK-NEXT: [[INC:%.*]] = xor <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: ret <2 x i8> [[INC]] ; %shl = shl <2 x i8> %x, @@ -291,7 +291,7 @@ define i1 @test11(i8 %A) { define <2 x i1> @test11vec(<2 x i8> %a) { ; CHECK-LABEL: @test11vec( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add <2 x i8> %a, @@ -312,7 +312,7 @@ define i8 @reassoc_shl1(i8 %x, i8 %y) { define <2 x i8> @reassoc_shl1_commute1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @reassoc_shl1_commute1( -; CHECK-NEXT: [[REASS_ADD:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[REASS_ADD:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[Y:%.*]], [[REASS_ADD]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -521,7 +521,7 @@ define i32 @test19(i1 %C) { define <2 x i32> @test19vec(i1 %C) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 1123), <2 x i32> splat (i32 133) ; CHECK-NEXT: ret <2 x i32> [[V]] ; %A = select i1 %C, <2 x i32> , <2 x i32> @@ -554,7 +554,7 @@ define i32 @xor_sign_bit(i32 %x) { define <2 x i32> @xor_sign_bit_vec_splat(<2 x i32> %x) { ; CHECK-LABEL: @xor_sign_bit_vec_splat( -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -2147483606) ; CHECK-NEXT: ret <2 x i32> [[ADD]] ; %xor = xor <2 x i32> %x, @@ -684,7 +684,7 @@ define i1 @test21(i32 %x) { define <2 x i1> @test21vec(<2 x i32> %x) { ; CHECK-LABEL: @test21vec( -; CHECK-NEXT: [[Y:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 119) ; CHECK-NEXT: ret <2 x i1> [[Y]] ; %t = add <2 x i32> %x, @@ -882,8 +882,8 @@ define i8 @masked_add(i8 %x) { define <2 x i8> @masked_add_splat(<2 x i8> %x) { ; CHECK-LABEL: @masked_add_splat( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -64) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[AND]], splat (i8 64) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %and = and <2 x i8> %x, ; 0xc0 @@ -1008,7 +1008,7 @@ define i64 @test41_multiuse_constants_cancel(i32 %a) { define <2 x i64> @test41vec(<2 x i32> %a) { ; CHECK-LABEL: @test41vec( -; CHECK-NEXT: [[TMP1:%.*]] = add nuw <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add nuw <2 x i32> [[A:%.*]], splat (i32 15) ; CHECK-NEXT: [[SUB:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[SUB]] ; @@ -1020,10 +1020,10 @@ define <2 x i64> @test41vec(<2 x i32> %a) { define <2 x i64> @test41vec_and_multiuse(<2 x i32> %a) { ; CHECK-LABEL: @test41vec_and_multiuse( -; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i32> [[A:%.*]], splat (i32 16) ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i32> [[ADD]] to <2 x i64> -; CHECK-NEXT: [[REASS_ADD:%.*]] = shl nuw nsw <2 x i64> [[ZEXT]], -; CHECK-NEXT: [[EXTRAUSE:%.*]] = add nsw <2 x i64> [[REASS_ADD]], +; CHECK-NEXT: [[REASS_ADD:%.*]] = shl nuw nsw <2 x i64> [[ZEXT]], splat (i64 1) +; CHECK-NEXT: [[EXTRAUSE:%.*]] = add nsw <2 x i64> [[REASS_ADD]], splat (i64 -1) ; CHECK-NEXT: ret <2 x i64> [[EXTRAUSE]] ; %add = add nuw <2 x i32> %a, @@ -1045,7 +1045,7 @@ define i32 @test42(i1 %C) { define <2 x i32> @test42vec(i1 %C) { ; CHECK-LABEL: @test42vec( -; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 1123), <2 x i32> splat (i32 133) ; CHECK-NEXT: ret <2 x i32> [[V]] ; %A = select i1 %C, <2 x i32> , <2 x i32> @@ -1092,7 +1092,7 @@ define <2 x i32> @test43vec(i1 %which) { ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ splat (i32 1123), [[ENTRY:%.*]] ], [ splat (i32 133), [[DELAY]] ] ; CHECK-NEXT: ret <2 x i32> [[A]] ; entry: @@ -1230,7 +1230,7 @@ define i32 @test44_non_matching(i32 %A) { define <2 x i32> @test44_vec(<2 x i32> %A) { ; CHECK-LABEL: @test44_vec( -; CHECK-NEXT: [[C:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -124) ; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = or <2 x i32> %A, @@ -1240,8 +1240,8 @@ define <2 x i32> @test44_vec(<2 x i32> %A) { define <2 x i32> @test44_vec_non_matching(<2 x i32> %A) { ; CHECK-LABEL: @test44_vec_non_matching( -; CHECK-NEXT: [[B:%.*]] = or <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], +; CHECK-NEXT: [[B:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 123) +; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], splat (i32 -321) ; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = or <2 x i32> %A, @@ -1301,7 +1301,7 @@ define i5 @and_add(i1 %x, i1 %y) { define <2 x i8> @ashr_add_commute(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @ashr_add_commute( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[TMP3]] @@ -1994,7 +1994,7 @@ define i8 @mul_add_common_factor_commute1(i8 %x, i8 %y) { define <2 x i8> @mul_add_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @mul_add_common_factor_commute2( -; CHECK-NEXT: [[M1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[M1:%.*]] = add <2 x i8> [[Y:%.*]], splat (i8 1) ; CHECK-NEXT: [[A:%.*]] = mul nuw <2 x i8> [[M1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[A]] ; @@ -2061,8 +2061,8 @@ define i8 @not_mul(i8 %x) { define <2 x i8> @not_mul_commute(<2 x i8> %p) { ; CHECK-LABEL: @not_mul_commute( ; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[P:%.*]], [[P]] -; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i8> [[X]], -; CHECK-NEXT: [[PLUSX:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i8> [[X]], splat (i8 43) +; CHECK-NEXT: [[PLUSX:%.*]] = add <2 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i8> [[PLUSX]] ; %x = mul <2 x i8> %p, %p ; thwart complexity-based canonicalization @@ -2134,7 +2134,7 @@ define i8 @full_ashr_inc(i8 %x) { define <2 x i6> @full_ashr_inc_vec(<2 x i6> %x) { ; CHECK-LABEL: @full_ashr_inc_vec( -; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i6> [[X:%.*]], +; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i6> [[X:%.*]], splat (i6 -1) ; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[ISNOTNEG]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] ; @@ -2520,7 +2520,7 @@ define i8 @mul_negpow2(i8 %x, i8 %y) { define <2 x i8> @mul_negpow2_commute_vec(<2 x i8> %x, <2 x i8> %p) { ; CHECK-LABEL: @mul_negpow2_commute_vec( ; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[P:%.*]], [[P]] -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[A:%.*]] = sub <2 x i8> [[Y]], [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[A]] ; @@ -2704,7 +2704,7 @@ define i5 @zext_zext_not(i3 noundef %x) { define <2 x i5> @zext_zext_not_commute(<2 x i3> noundef %x) { ; CHECK-LABEL: @zext_zext_not_commute( -; CHECK-NEXT: ret <2 x i5> +; CHECK-NEXT: ret <2 x i5> splat (i5 7) ; %zx = zext <2 x i3> %x to <2 x i5> %notx = xor <2 x i3> %x, @@ -2886,7 +2886,7 @@ define i8 @floor_sdiv_by_2_wrong_cast(i8 %x) { define <2 x i32> @floor_sdiv_vec_commute(<2 x i32> %x) { ; CHECK-LABEL: @floor_sdiv_vec_commute( -; CHECK-NEXT: [[R:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %d = sdiv <2 x i32> %x, @@ -3190,7 +3190,7 @@ define i32 @dec_zext_add_nonzero(i8 %x) { define <2 x i32> @dec_zext_add_nonzero_vec(<2 x i8> %x) { ; CHECK-LABEL: @dec_zext_add_nonzero_vec( -; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8) ; CHECK-NEXT: [[C:%.*]] = zext <2 x i8> [[O]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[C]] ; @@ -3206,9 +3206,9 @@ define <2 x i32> @dec_zext_add_nonzero_vec(<2 x i8> %x) { define <2 x i32> @dec_zext_add_nonzero_vec_undef0(<2 x i8> %x) { ; CHECK-LABEL: @dec_zext_add_nonzero_vec_undef0( ; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[O]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[O]], splat (i8 -1) ; CHECK-NEXT: [[B:%.*]] = zext <2 x i8> [[A]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = add nuw nsw <2 x i32> [[B]], +; CHECK-NEXT: [[C:%.*]] = add nuw nsw <2 x i32> [[B]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[C]] ; %o = or <2 x i8> %x, @@ -3233,7 +3233,7 @@ define <2 x i32> @dec_zext_add_nonzero_poison0(<2 x i8> %x) { define <2 x i32> @dec_zext_add_nonzero_vec_poison1(<2 x i8> %x) { ; CHECK-LABEL: @dec_zext_add_nonzero_vec_poison1( -; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8) ; CHECK-NEXT: [[C:%.*]] = zext <2 x i8> [[O]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[C]] ; @@ -3246,8 +3246,8 @@ define <2 x i32> @dec_zext_add_nonzero_vec_poison1(<2 x i8> %x) { define <2 x i32> @dec_zext_add_nonzero_vec_poison2(<2 x i8> %x) { ; CHECK-LABEL: @dec_zext_add_nonzero_vec_poison2( -; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[A:%.*]] = add nsw <2 x i8> [[O]], +; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8) +; CHECK-NEXT: [[A:%.*]] = add nsw <2 x i8> [[O]], splat (i8 -1) ; CHECK-NEXT: [[B:%.*]] = zext <2 x i8> [[A]] to <2 x i32> ; CHECK-NEXT: [[C:%.*]] = add nuw nsw <2 x i32> [[B]], ; CHECK-NEXT: ret <2 x i32> [[C]] diff --git a/llvm/test/Transforms/InstCombine/add4.ll b/llvm/test/Transforms/InstCombine/add4.ll index 77f7fc7b35cd44..0e97deb4d98ade 100644 --- a/llvm/test/Transforms/InstCombine/add4.ll +++ b/llvm/test/Transforms/InstCombine/add4.ll @@ -19,7 +19,7 @@ define i64 @match_unsigned(i64 %x) { define <2 x i64> @match_unsigned_vector(<2 x i64> %x) { ; CHECK-LABEL: @match_unsigned_vector( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[UREM:%.*]] = urem <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[UREM:%.*]] = urem <2 x i64> [[X:%.*]], splat (i64 19136) ; CHECK-NEXT: ret <2 x i64> [[UREM]] ; bb: @@ -63,7 +63,7 @@ define i64 @match_signed(i64 %x) { define <2 x i64> @match_signed_vector(<2 x i64> %x) { ; CHECK-LABEL: @match_signed_vector( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[SREM1:%.*]] = srem <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[SREM1:%.*]] = srem <2 x i64> [[X:%.*]], splat (i64 172224) ; CHECK-NEXT: ret <2 x i64> [[SREM1]] ; bb: diff --git a/llvm/test/Transforms/InstCombine/add_or_sub.ll b/llvm/test/Transforms/InstCombine/add_or_sub.ll index ef44f036b71fa4..7d005d3cb16f14 100644 --- a/llvm/test/Transforms/InstCombine/add_or_sub.ll +++ b/llvm/test/Transforms/InstCombine/add_or_sub.ll @@ -58,7 +58,7 @@ define i64 @add_or_sub_comb_i64_commuted4(i64 %p) { define <3 x i32> @add_or_sub_comb_i32vec(<3 x i32> %p) { ; CHECK-LABEL: @add_or_sub_comb_i32vec( ; CHECK-NEXT: [[X:%.*]] = mul <3 x i32> [[P:%.*]], [[P]] -; CHECK-NEXT: [[TMP1:%.*]] = add <3 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <3 x i32> [[X]], splat (i32 -1) ; CHECK-NEXT: [[ADD:%.*]] = and <3 x i32> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i32> [[ADD]] ; @@ -72,7 +72,7 @@ define <3 x i32> @add_or_sub_comb_i32vec(<3 x i32> %p) { define <4 x i16> @add_or_sub_comb_i32vec_poison(<4 x i16> %p) { ; CHECK-LABEL: @add_or_sub_comb_i32vec_poison( ; CHECK-NEXT: [[X:%.*]] = mul <4 x i16> [[P:%.*]], [[P]] -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[X]], splat (i16 -1) ; CHECK-NEXT: [[ADD:%.*]] = and <4 x i16> [[TMP1]], [[X]] ; CHECK-NEXT: ret <4 x i16> [[ADD]] ; diff --git a/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll b/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll index da92ca11a9e0d0..b38466f6cf0ec2 100644 --- a/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll +++ b/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll @@ -31,7 +31,7 @@ define i32 @add_const_add_const_extrause(i32 %arg) { define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) { ; CHECK-LABEL: @vec_add_const_add_const( -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 10) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = add <4 x i32> %arg, @@ -41,9 +41,9 @@ define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) { define <4 x i32> @vec_add_const_add_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_add_const_add_const_extrause( -; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 8) ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 10) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = add <4 x i32> %arg, @@ -89,7 +89,7 @@ define i32 @add_const_sub_const_extrause(i32 %arg) { define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) { ; CHECK-LABEL: @vec_add_const_sub_const( -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 6) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = add <4 x i32> %arg, @@ -99,9 +99,9 @@ define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) { define <4 x i32> @vec_add_const_sub_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_add_const_sub_const_extrause( -; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 8) ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 6) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = add <4 x i32> %arg, @@ -260,7 +260,7 @@ define i32 @add_const_const_sub_extrause(i32 %arg) { define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) { ; CHECK-LABEL: @vec_add_const_const_sub( -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 -6), [[ARG:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = add <4 x i32> %arg, @@ -270,9 +270,9 @@ define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) { define <4 x i32> @vec_add_const_const_sub_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_add_const_const_sub_extrause( -; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 8) ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 -6), [[ARG]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = add <4 x i32> %arg, @@ -318,7 +318,7 @@ define i32 @sub_const_add_const_extrause(i32 %arg) { define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) { ; CHECK-LABEL: @vec_sub_const_add_const( -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -6) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> %arg, @@ -328,9 +328,9 @@ define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) { define <4 x i32> @vec_sub_const_add_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_sub_const_add_const_extrause( -; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -8) ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 -6) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> %arg, @@ -376,7 +376,7 @@ define i32 @sub_const_sub_const_extrause(i32 %arg) { define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) { ; CHECK-LABEL: @vec_sub_const_sub_const( -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -10) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> %arg, @@ -386,9 +386,9 @@ define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) { define <4 x i32> @vec_sub_const_sub_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_sub_const_sub_const_extrause( -; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -8) ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 -10) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> %arg, @@ -434,7 +434,7 @@ define i32 @sub_const_const_sub_extrause(i32 %arg) { define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) { ; CHECK-LABEL: @vec_sub_const_const_sub( -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 10), [[ARG:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> %arg, @@ -444,9 +444,9 @@ define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) { define <4 x i32> @vec_sub_const_const_sub_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_sub_const_const_sub_extrause( -; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -8) ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 10), [[ARG]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> %arg, @@ -492,7 +492,7 @@ define i32 @const_sub_add_const_extrause(i32 %arg) { define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) { ; CHECK-LABEL: @vec_const_sub_add_const( -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 10), [[ARG:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> , %arg @@ -502,9 +502,9 @@ define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) { define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_const_sub_add_const_extrause( -; CHECK-NEXT: [[T0:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <4 x i32> splat (i32 8), [[ARG:%.*]] ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 10), [[ARG]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> , %arg @@ -550,7 +550,7 @@ define i32 @const_sub_sub_const_extrause(i32 %arg) { define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) { ; CHECK-LABEL: @vec_const_sub_sub_const( -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 6), [[ARG:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> , %arg @@ -560,9 +560,9 @@ define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) { define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_const_sub_sub_const_extrause( -; CHECK-NEXT: [[T0:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <4 x i32> splat (i32 8), [[ARG:%.*]] ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> , [[ARG]] +; CHECK-NEXT: [[T1:%.*]] = sub <4 x i32> splat (i32 6), [[ARG]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> , %arg @@ -609,7 +609,7 @@ define i32 @const_sub_const_sub_extrause(i32 %arg) { define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) { ; CHECK-LABEL: @vec_const_sub_const_sub( -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -6) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> , %arg @@ -619,9 +619,9 @@ define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) { define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) { ; CHECK-LABEL: @vec_const_sub_const_sub_extrause( -; CHECK-NEXT: [[T0:%.*]] = sub <4 x i32> , [[ARG:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <4 x i32> splat (i32 8), [[ARG:%.*]] ; CHECK-NEXT: call void @vec_use(<4 x i32> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], +; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 -6) ; CHECK-NEXT: ret <4 x i32> [[T1]] ; %t0 = sub <4 x i32> , %arg diff --git a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll index 76fc7a07be6bd6..0b4720c1a209fb 100644 --- a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll +++ b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll @@ -115,7 +115,7 @@ define i32 @umax3(i32 %n) { define <2 x i32> @umax3_vec(<2 x i32> %n) { ; CHECK-LABEL: @umax3_vec( -; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> ) +; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> splat (i32 5)) ; CHECK-NEXT: ret <2 x i32> [[M]] ; %t = icmp ugt <2 x i32> %n, @@ -139,7 +139,7 @@ define i32 @umin3(i32 %n) { define <2 x i32> @umin3_vec(<2 x i32> %n) { ; CHECK-LABEL: @umin3_vec( -; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> ) +; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> splat (i32 6)) ; CHECK-NEXT: ret <2 x i32> [[M]] ; %t = icmp ult <2 x i32> %n, @@ -211,7 +211,7 @@ define i32 @umax4(i32 %n) { define <2 x i32> @umax4_vec(<2 x i32> %n) { ; CHECK-LABEL: @umax4_vec( -; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> ) +; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> splat (i32 8)) ; CHECK-NEXT: ret <2 x i32> [[M]] ; %t = icmp uge <2 x i32> %n, @@ -235,7 +235,7 @@ define i32 @umin4(i32 %n) { define <2 x i32> @umin4_vec(<2 x i32> %n) { ; CHECK-LABEL: @umin4_vec( -; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> ) +; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> splat (i32 9)) ; CHECK-NEXT: ret <2 x i32> [[M]] ; %t = icmp ule <2 x i32> %n, @@ -305,7 +305,7 @@ define i64 @umax_sext(i32 %a) { define <2 x i64> @umax_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umax_sext_vec( -; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 3)) ; CHECK-NEXT: [[MAX:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MAX]] ; @@ -329,7 +329,7 @@ define i64 @umin_sext(i32 %a) { define <2 x i64> @umin_sext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_sext_vec( -; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 2)) ; CHECK-NEXT: [[MIN:%.*]] = zext nneg <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; @@ -353,7 +353,7 @@ define i64 @umax_sext2(i32 %a) { define <2 x i64> @umax_sext2_vec(<2 x i32> %a) { ; CHECK-LABEL: @umax_sext2_vec( -; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 2)) ; CHECK-NEXT: [[MIN:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; @@ -377,7 +377,7 @@ define i64 @umin_sext2(i32 %a) { define <2 x i64> @umin_sext2_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_sext2_vec( -; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 3)) ; CHECK-NEXT: [[MIN:%.*]] = zext nneg <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; @@ -401,7 +401,7 @@ define i64 @umax_zext(i32 %a) { define <2 x i64> @umax_zext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umax_zext_vec( -; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 3)) ; CHECK-NEXT: [[MAX:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MAX]] ; @@ -425,7 +425,7 @@ define i64 @umin_zext(i32 %a) { define <2 x i64> @umin_zext_vec(<2 x i32> %a) { ; CHECK-LABEL: @umin_zext_vec( -; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> ) +; CHECK-NEXT: [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 2)) ; CHECK-NEXT: [[MIN:%.*]] = zext nneg <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MIN]] ; diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll index 9f8d3e317accc7..c8f93410f9e9c8 100644 --- a/llvm/test/Transforms/InstCombine/and-compare.ll +++ b/llvm/test/Transforms/InstCombine/and-compare.ll @@ -23,7 +23,7 @@ define i1 @test1(i32 %a, i32 %b) { define <2 x i1> @test1vec(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test1vec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 65280) ; CHECK-NEXT: [[TMP:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[TMP]] ; @@ -46,7 +46,7 @@ define i1 @test2(i64 %A) { define <2 x i1> @test2vec(<2 x i64> %A) { ; CHECK-LABEL: @test2vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[A:%.*]], splat (i64 128) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> [[AND]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -68,7 +68,7 @@ define i1 @test3(i64 %A) { define <2 x i1> @test3vec(<2 x i64> %A) { ; CHECK-LABEL: @test3vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[A:%.*]], splat (i64 128) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i64> [[AND]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/and-fcmp.ll b/llvm/test/Transforms/InstCombine/and-fcmp.ll index c163802fcc935c..c03c9d69051edd 100644 --- a/llvm/test/Transforms/InstCombine/and-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/and-fcmp.ll @@ -4644,7 +4644,7 @@ define i1 @clang_builtin_isnormal_inf_check(half %x) { define <2 x i1> @clang_builtin_isnormal_inf_check_vector(<2 x half> %x) { ; CHECK-LABEL: @clang_builtin_isnormal_inf_check_vector( ; CHECK-NEXT: [[FABS_X:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]]) -; CHECK-NEXT: [[AND:%.*]] = fcmp oeq <2 x half> [[FABS_X]], +; CHECK-NEXT: [[AND:%.*]] = fcmp oeq <2 x half> [[FABS_X]], splat (half 0xH7C00) ; CHECK-NEXT: ret <2 x i1> [[AND]] ; %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) diff --git a/llvm/test/Transforms/InstCombine/and-or-icmp-const-icmp.ll b/llvm/test/Transforms/InstCombine/and-or-icmp-const-icmp.ll index de5de37fe2df64..ee76e0eb105109 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmp-const-icmp.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmp-const-icmp.ll @@ -139,7 +139,7 @@ define i1 @ne_multi_c2(i8 %x, i8 %y) { define <2 x i1> @eq_vector(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @eq_vector( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 -1) ; CHECK-NEXT: [[OR:%.*]] = icmp uge <2 x i8> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; @@ -152,7 +152,7 @@ define <2 x i1> @eq_vector(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @ne_vector_equal_5(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @ne_vector_equal_5( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 -6) ; CHECK-NEXT: [[AND:%.*]] = icmp ult <2 x i8> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i1> [[AND]] ; @@ -179,7 +179,7 @@ define <2 x i1> @eq_vector_equal_minus_1(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @ne_vector_equal_minus_7(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @ne_vector_equal_minus_7( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 6) ; CHECK-NEXT: [[AND:%.*]] = icmp ult <2 x i8> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i1> [[AND]] ; @@ -193,8 +193,8 @@ define <2 x i1> @ne_vector_equal_minus_7(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @eq_vector_unequal1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @eq_vector_unequal1( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[SUB:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[C1:%.*]] = icmp eq <2 x i8> [[X]], +; CHECK-NEXT: [[SUB:%.*]] = add <2 x i8> [[X]], splat (i8 -5) +; CHECK-NEXT: [[C1:%.*]] = icmp eq <2 x i8> [[X]], splat (i8 2) ; CHECK-NEXT: [[C2:%.*]] = icmp ugt <2 x i8> [[SUB]], [[Y]] ; CHECK-NEXT: [[OR:%.*]] = or <2 x i1> [[C1]], [[C2]] ; CHECK-NEXT: ret <2 x i1> [[OR]] @@ -209,8 +209,8 @@ define <2 x i1> @eq_vector_unequal1(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @ne_vector_unequal2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @ne_vector_unequal2( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[C1:%.*]] = icmp ne <2 x i8> [[X]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], splat (i8 7) +; CHECK-NEXT: [[C1:%.*]] = icmp ne <2 x i8> [[X]], splat (i8 -3) ; CHECK-NEXT: [[C2:%.*]] = icmp ule <2 x i8> [[ADD]], [[Y]] ; CHECK-NEXT: [[AND:%.*]] = and <2 x i1> [[C1]], [[C2]] ; CHECK-NEXT: ret <2 x i1> [[AND]] @@ -228,7 +228,7 @@ define <2 x i1> @ne_vector_unequal2(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @eq_vector_poison_icmp(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @eq_vector_poison_icmp( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 -6) ; CHECK-NEXT: [[OR:%.*]] = icmp uge <2 x i8> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; @@ -242,7 +242,7 @@ define <2 x i1> @eq_vector_poison_icmp(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @eq_vector_poison_add(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i1> @eq_vector_poison_add( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 -6) ; CHECK-NEXT: [[OR:%.*]] = icmp uge <2 x i8> [[TMP1]], [[Y]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 2b5d430e295757..fffe1f8426690b 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -335,8 +335,8 @@ define i1 @and_ne_with_diff_one_signed_logical(i64 %x) { define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -33) +; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 65) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %cmp1 = icmp eq <2 x i32> %x, @@ -347,8 +347,8 @@ define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) { define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) { ; CHECK-LABEL: @and_ne_with_diff_one_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -41) +; CHECK-NEXT: [[AND:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -2) ; CHECK-NEXT: ret <2 x i1> [[AND]] ; %cmp1 = icmp ne <2 x i32> %x, @@ -984,7 +984,7 @@ define <2 x i1> @substitute_constant_or_ne_slt_swap_vec_logical(<2 x i8> %x, <2 ; CHECK-LABEL: @substitute_constant_or_ne_slt_swap_vec_logical( ; CHECK-NEXT: [[C1:%.*]] = icmp ne <2 x i8> [[X:%.*]], ; CHECK-NEXT: [[C2:%.*]] = icmp slt <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C1]], <2 x i1> , <2 x i1> [[C2]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C1]], <2 x i1> splat (i1 true), <2 x i1> [[C2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c1 = icmp ne <2 x i8> %x, @@ -2154,7 +2154,7 @@ define i1 @samesign(i32 %x, i32 %y) { define <2 x i1> @samesign_different_sign_bittest1(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @samesign_different_sign_bittest1( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %a = and <2 x i32> %x, %y @@ -2509,7 +2509,7 @@ define i1 @samesign_inverted_wrong_cmp(i32 %x, i32 %y) { define <2 x i1> @icmp_eq_m1_and_eq_m1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @icmp_eq_m1_and_eq_m1( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %rx = icmp eq <2 x i8> %x, @@ -2521,7 +2521,7 @@ define <2 x i1> @icmp_eq_m1_and_eq_m1(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @icmp_eq_m1_and_eq_poison_m1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @icmp_eq_m1_and_eq_poison_m1( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %rx = icmp eq <2 x i8> %x, @@ -2571,7 +2571,7 @@ define <2 x i1> @icmp_eq_m1_or_eq_m1_fail(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @icmp_ne_m1_or_ne_m1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @icmp_ne_m1_or_ne_m1( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %rx = icmp ne <2 x i8> %x, @@ -2680,7 +2680,7 @@ define <2 x i64> @icmp_slt_0_or_icmp_sgt_0_i64x2(<2 x i64> %x) { define <2 x i64> @icmp_slt_0_or_icmp_sgt_0_i64x2_fail(<2 x i64> %x) { ; CHECK-LABEL: @icmp_slt_0_or_icmp_sgt_0_i64x2_fail( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[X:%.*]], splat (i64 1) ; CHECK-NEXT: [[E:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[E]] ; @@ -2794,7 +2794,7 @@ define i64 @icmp_slt_0_and_icmp_sge_neg1_i64_fail(i64 %x) { define <2 x i32> @icmp_slt_0_and_icmp_sge_neg1_i32x2(<2 x i32> %x) { ; CHECK-LABEL: @icmp_slt_0_and_icmp_sge_neg1_i32x2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[D]] ; @@ -2807,7 +2807,7 @@ define <2 x i32> @icmp_slt_0_and_icmp_sge_neg1_i32x2(<2 x i32> %x) { define <2 x i32> @icmp_slt_0_and_icmp_sge_neg2_i32x2(<2 x i32> %x) { ; CHECK-LABEL: @icmp_slt_0_and_icmp_sge_neg2_i32x2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 -3) ; CHECK-NEXT: [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[D]] ; @@ -3085,7 +3085,7 @@ entry: define <4 x i1> @logical_and_icmps_vec1(<4 x i32> %a, <4 x i1> %other_cond) { ; CHECK-LABEL: @logical_and_icmps_vec1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[A:%.*]], splat (i32 10086) ; CHECK-NEXT: [[RET:%.*]] = select <4 x i1> [[OTHER_COND:%.*]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/and-or-not.ll b/llvm/test/Transforms/InstCombine/and-or-not.ll index 5e6c480df5d103..e06e1b1f7255fc 100644 --- a/llvm/test/Transforms/InstCombine/and-or-not.ll +++ b/llvm/test/Transforms/InstCombine/and-or-not.ll @@ -682,7 +682,7 @@ define i4 @simplify_or_common_op_commute2(i4 %x, i4 %y, i4 %p, i4 %q) { define <2 x i4> @simplify_or_common_op_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %p) { ; CHECK-LABEL: @simplify_or_common_op_commute3( -; CHECK-NEXT: ret <2 x i4> +; CHECK-NEXT: ret <2 x i4> splat (i4 -1) ; %z = mul <2 x i4> %p, %p ; thwart complexity-based canonicalization %xy = and <2 x i4> %y, %x diff --git a/llvm/test/Transforms/InstCombine/and-or.ll b/llvm/test/Transforms/InstCombine/and-or.ll index fee055a2e12451..2f3e0c2e26ca82 100644 --- a/llvm/test/Transforms/InstCombine/and-or.ll +++ b/llvm/test/Transforms/InstCombine/and-or.ll @@ -60,7 +60,7 @@ define i32 @or_and_not_constant_commute3(i32 %a, i32 %b) { define <2 x i7> @or_and_not_constant_commute0_splat(<2 x i7> %a, <2 x i7> %b) { ; CHECK-LABEL: @or_and_not_constant_commute0_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i7> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i7> [[A:%.*]], splat (i7 42) ; CHECK-NEXT: [[T3:%.*]] = or <2 x i7> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret <2 x i7> [[T3]] ; @@ -118,13 +118,13 @@ define i8 @or_and_or_commute1(i8 %x) { define <2 x i8> @or_and_or_commute1_splat(<2 x i8> %x) { ; CHECK-LABEL: @or_and_or_commute1_splat( -; CHECK-NEXT: [[XN:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[XN:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 16) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[XN]]) -; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[XN]], +; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[XN]], splat (i8 59) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[X1]]) -; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[X]], +; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[X]], splat (i8 64) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[X2]]) -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[XN]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[XN]], splat (i8 123) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %xn = or <2 x i8> %x, @@ -162,14 +162,14 @@ define i8 @or_and_or_commute2(i8 %x, i8 %y) { define <2 x i8> @or_and_or_commute2_splat(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @or_and_or_commute2_splat( -; CHECK-NEXT: [[N:%.*]] = lshr <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[N:%.*]] = lshr <2 x i8> [[Y:%.*]], splat (i8 6) ; CHECK-NEXT: [[XN:%.*]] = or <2 x i8> [[N]], [[X:%.*]] ; CHECK-NEXT: call void @use_vec(<2 x i8> [[XN]]) -; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[XN]], +; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[XN]], splat (i8 -69) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[X1]]) -; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[X]], +; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[X]], splat (i8 64) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[X2]]) -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[XN]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[XN]], splat (i8 -5) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %n = lshr <2 x i8> %y, @@ -234,16 +234,16 @@ define i8 @or_and2_or2(i8 %x) { define <2 x i8> @or_and2_or2_splat(<2 x i8> %x) { ; CHECK-LABEL: @or_and2_or2_splat( -; CHECK-NEXT: [[O1:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[O1:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[O1]]) -; CHECK-NEXT: [[O2:%.*]] = or <2 x i8> [[X]], +; CHECK-NEXT: [[O2:%.*]] = or <2 x i8> [[X]], splat (i8 2) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[O2]]) -; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[O1]], +; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[O1]], splat (i8 -71) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[X1]]) -; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[O2]], +; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[O2]], splat (i8 66) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[X2]]) -; CHECK-NEXT: [[BITFIELD:%.*]] = and <2 x i8> [[X]], -; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i8> [[BITFIELD]], +; CHECK-NEXT: [[BITFIELD:%.*]] = and <2 x i8> [[X]], splat (i8 -8) +; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i8> [[BITFIELD]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %o1 = or <2 x i8> %x, @@ -277,8 +277,8 @@ define i8 @and_or_hoist_mask(i8 %a, i8 %b) { define <2 x i8> @and_xor_hoist_mask_vec_splat(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @and_xor_hoist_mask_vec_splat( -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B_MASKED:%.*]] = and <2 x i8> [[B:%.*]], +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], splat (i8 6) +; CHECK-NEXT: [[B_MASKED:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 3) ; CHECK-NEXT: [[AND:%.*]] = xor <2 x i8> [[SH]], [[B_MASKED]] ; CHECK-NEXT: ret <2 x i8> [[AND]] ; @@ -305,9 +305,9 @@ define i8 @and_xor_hoist_mask_commute(i8 %a, i8 %b) { define <2 x i8> @and_or_hoist_mask_commute_vec_splat(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @and_or_hoist_mask_commute_vec_splat( -; CHECK-NEXT: [[C:%.*]] = mul <2 x i8> [[B:%.*]], -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[C_MASKED:%.*]] = and <2 x i8> [[C]], +; CHECK-NEXT: [[C:%.*]] = mul <2 x i8> [[B:%.*]], splat (i8 3) +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], splat (i8 6) +; CHECK-NEXT: [[C_MASKED:%.*]] = and <2 x i8> [[C]], splat (i8 3) ; CHECK-NEXT: [[AND:%.*]] = or <2 x i8> [[C_MASKED]], [[SH]] ; CHECK-NEXT: ret <2 x i8> [[AND]] ; diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll index 3dbf9af7e19343..5a0890e918ef0f 100644 --- a/llvm/test/Transforms/InstCombine/and-xor-or.ll +++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll @@ -66,7 +66,7 @@ define <2 x i32> @and_xor_common_op_commute3(<2 x i32> %pa, <2 x i32> %pb) { ; CHECK-SAME: (<2 x i32> [[PA:%.*]], <2 x i32> [[PB:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = udiv <2 x i32> , [[PA]] ; CHECK-NEXT: [[B:%.*]] = udiv <2 x i32> , [[PB]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[B]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[B]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[A]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -83,7 +83,7 @@ define <2 x i32> @and_xor_common_op_commute3(<2 x i32> %pa, <2 x i32> %pb) { define <4 x i32> @and_xor_common_op_constant(<4 x i32> %A) { ; CHECK-LABEL: define {{[^@]+}}@and_xor_common_op_constant ; CHECK-SAME: (<4 x i32> [[A:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A]], splat (i32 -1) ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -740,9 +740,9 @@ define i32 @not_and_and_not(i32 %a0, i32 %b, i32 %c) { define <4 x i64> @not_and_and_not_4i64(<4 x i64> %a0, <4 x i64> %b, <4 x i64> %c) { ; CHECK-LABEL: define {{[^@]+}}@not_and_and_not_4i64 ; CHECK-SAME: (<4 x i64> [[A0:%.*]], <4 x i64> [[B:%.*]], <4 x i64> [[C:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = sdiv <4 x i64> , [[A0]] +; CHECK-NEXT: [[A:%.*]] = sdiv <4 x i64> splat (i64 42), [[A0]] ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i64> [[B]], [[C]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i64> [[TMP1]], splat (i64 -1) ; CHECK-NEXT: [[AND2:%.*]] = and <4 x i64> [[A]], [[TMP2]] ; CHECK-NEXT: ret <4 x i64> [[AND2]] ; @@ -835,9 +835,9 @@ define i32 @not_or_or_not(i32 %a0, i32 %b, i32 %c) { define <2 x i6> @not_or_or_not_2i6(<2 x i6> %a0, <2 x i6> %b, <2 x i6> %c) { ; CHECK-LABEL: define {{[^@]+}}@not_or_or_not_2i6 ; CHECK-SAME: (<2 x i6> [[A0:%.*]], <2 x i6> [[B:%.*]], <2 x i6> [[C:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i6> , [[A0]] +; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i6> splat (i6 3), [[A0]] ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i6> [[B]], [[C]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i6> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i6> [[TMP1]], splat (i6 -1) ; CHECK-NEXT: [[OR2:%.*]] = or <2 x i6> [[A]], [[TMP2]] ; CHECK-NEXT: ret <2 x i6> [[OR2]] ; @@ -4013,7 +4013,7 @@ define i4 @and_orn_xor(i4 %a, i4 %b) { define <2 x i4> @and_orn_xor_commute1(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: define {{[^@]+}}@and_orn_xor_commute1 ; CHECK-SAME: (<2 x i4> [[A:%.*]], <2 x i4> [[B:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[A]], splat (i4 -1) ; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[B]], [[TMP1]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -4392,8 +4392,8 @@ define i32 @canonicalize_logic_first_or0_nswnuw(i32 %x) { define <2 x i32> @canonicalize_logic_first_or_vector0(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_or_vector0 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 15) +; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[TMP1]], splat (i32 112) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add <2 x i32> , %x ; <0x00000070, 0x00000070> @@ -4404,8 +4404,8 @@ define <2 x i32> @canonicalize_logic_first_or_vector0(<2 x i32> %x) { define <2 x i32> @canonicalize_logic_first_or_vector0_nsw(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_or_vector0_nsw ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 15) +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i32> [[TMP1]], splat (i32 112) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add nsw <2 x i32> , %x ; <0x00000070, 0x00000070> @@ -4416,8 +4416,8 @@ define <2 x i32> @canonicalize_logic_first_or_vector0_nsw(<2 x i32> %x) { define <2 x i32> @canonicalize_logic_first_or_vector0_nswnuw(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_or_vector0_nswnuw ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 15) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i32> [[TMP1]], splat (i32 112) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add nsw nuw <2 x i32> , %x ; <0x00000070, 0x00000070> @@ -4538,8 +4538,8 @@ define i8 @canonicalize_logic_first_and0_nswnuw(i8 %x) { define <2 x i8> @canonicalize_logic_first_and_vector0(<2 x i8> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_and_vector0 ; CHECK-SAME: (<2 x i8> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], -; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], splat (i8 -10) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], splat (i8 48) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = add <2 x i8> , %x @@ -4550,8 +4550,8 @@ define <2 x i8> @canonicalize_logic_first_and_vector0(<2 x i8> %x) { define <2 x i8> @canonicalize_logic_first_and_vector0_nsw(<2 x i8> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_and_vector0_nsw ; CHECK-SAME: (<2 x i8> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], splat (i8 -10) +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[TMP1]], splat (i8 48) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = add nsw <2 x i8> , %x @@ -4562,8 +4562,8 @@ define <2 x i8> @canonicalize_logic_first_and_vector0_nsw(<2 x i8> %x) { define <2 x i8> @canonicalize_logic_first_and_vector0_nswnuw(<2 x i8> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_and_vector0_nswnuw ; CHECK-SAME: (<2 x i8> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], splat (i8 -10) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i8> [[TMP1]], splat (i8 48) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = add nsw nuw <2 x i8> , %x @@ -4589,7 +4589,7 @@ define <2 x i8> @canonicalize_logic_first_and_vector1(<2 x i8> %x) { define <2 x i32> @canonicalize_logic_first_and_vector2(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_and_vector2 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[X]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[X]], splat (i32 612368384) ; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[A]], ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -4675,8 +4675,8 @@ define i8 @canonicalize_logic_first_xor_0_nswnuw(i8 %x) { define <2 x i32> @canonicalize_logic_first_xor_vector0(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_xor_vector0 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X]], splat (i32 32783) +; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[TMP1]], splat (i32 -8388608) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add <2 x i32> , %x ; <0xFF800000, 0xFF800000> @@ -4687,8 +4687,8 @@ define <2 x i32> @canonicalize_logic_first_xor_vector0(<2 x i32> %x) { define <2 x i32> @canonicalize_logic_first_xor_vector0_nsw(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_xor_vector0_nsw ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X]], splat (i32 32783) +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i32> [[TMP1]], splat (i32 -8388608) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add nsw <2 x i32> , %x ; <0xFF800000, 0xFF800000> @@ -4699,8 +4699,8 @@ define <2 x i32> @canonicalize_logic_first_xor_vector0_nsw(<2 x i32> %x) { define <2 x i32> @canonicalize_logic_first_xor_vector0_nswnuw(<2 x i32> %x) { ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_xor_vector0_nswnuw ; CHECK-SAME: (<2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X]], splat (i32 32783) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i32> [[TMP1]], splat (i32 -8388608) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add nsw nuw <2 x i32> , %x ; <0xFF800000, 0xFF800000> diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll index 466718c8023007..f0fd0e262a795f 100644 --- a/llvm/test/Transforms/InstCombine/and.ll +++ b/llvm/test/Transforms/InstCombine/and.ll @@ -322,7 +322,7 @@ define i1 @test18(i32 %A) { define <2 x i1> @test18_vec(<2 x i32> %A) { ; CHECK-LABEL: @test18_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 127) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = and <2 x i32> %A, @@ -342,7 +342,7 @@ define i1 @test18a(i8 %A) { define <2 x i1> @test18a_vec(<2 x i8> %A) { ; CHECK-LABEL: @test18a_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], splat (i8 2) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = and <2 x i8> %A, @@ -396,7 +396,7 @@ define i1 @test23_logical(i32 %A) { define <2 x i1> @test23vec(<2 x i32> %A) { ; CHECK-LABEL: @test23vec( -; CHECK-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[A:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %B = icmp sgt <2 x i32> %A, @@ -455,8 +455,8 @@ define i1 @test25_logical(i32 %A) { define <2 x i1> @test25vec(<2 x i32> %A) { ; CHECK-LABEL: @test25vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -50) +; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 50) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %B = icmp sge <2 x i32> %A, @@ -504,9 +504,9 @@ define i32 @ashr_lowmask_use(i32 %x) { define <2 x i8> @ashr_lowmask_use_splat(<2 x i8> %x, ptr %p) { ; CHECK-LABEL: @ashr_lowmask_use_splat( -; CHECK-NEXT: [[A:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: store <2 x i8> [[A]], ptr [[P:%.*]], align 2 -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i8> [[X]], splat (i8 7) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = ashr <2 x i8> %x, @@ -596,7 +596,7 @@ define i32 @test31(i1 %X) { define <2 x i32> @and_demanded_bits_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @and_demanded_bits_splat_vec( -; CHECK-NEXT: [[Z:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[Z:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 7) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %y = add <2 x i32> %x, @@ -658,7 +658,7 @@ define i32 @test33b(i32 %b) { define <2 x i32> @test33vec(<2 x i32> %b) { ; CHECK-LABEL: @test33vec( -; CHECK-NEXT: [[T13:%.*]] = xor <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[T13:%.*]] = xor <2 x i32> [[B:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[T13]] ; %t4.mask = and <2 x i32> %b, @@ -670,7 +670,7 @@ define <2 x i32> @test33vec(<2 x i32> %b) { define <2 x i32> @test33vecb(<2 x i32> %b) { ; CHECK-LABEL: @test33vecb( -; CHECK-NEXT: [[T13:%.*]] = xor <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[T13:%.*]] = xor <2 x i32> [[B:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[T13]] ; %t4.mask = and <2 x i32> %b, @@ -716,7 +716,7 @@ define i64 @test35(i32 %X) { define <2 x i64> @test35_uniform(<2 x i32> %X) { ; CHECK-LABEL: @test35_uniform( ; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 240) ; CHECK-NEXT: [[RES:%.*]] = zext nneg <2 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -741,8 +741,8 @@ define i64 @test36(i32 %X) { define <2 x i64> @test36_uniform(<2 x i32> %X) { ; CHECK-LABEL: @test36_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 7) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 240) ; CHECK-NEXT: [[RES:%.*]] = zext nneg <2 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -780,8 +780,8 @@ define i64 @test37(i32 %X) { define <2 x i64> @test37_uniform(<2 x i32> %X) { ; CHECK-LABEL: @test37_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[X:%.*]], splat (i32 7) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 240) ; CHECK-NEXT: [[RES:%.*]] = zext nneg <2 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -988,7 +988,7 @@ define i32 @test40(i1 %C) { define <2 x i32> @test40vec(i1 %C) { ; CHECK-LABEL: @test40vec( -; CHECK-NEXT: [[A:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[A:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 104), <2 x i32> splat (i32 10) ; CHECK-NEXT: ret <2 x i32> [[A]] ; %A = select i1 %C, <2 x i32> , <2 x i32> @@ -1035,7 +1035,7 @@ define <2 x i32> @test41vec(i1 %which) { ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ splat (i32 104), [[ENTRY:%.*]] ], [ splat (i32 10), [[DELAY]] ] ; CHECK-NEXT: ret <2 x i32> [[A]] ; entry: @@ -1448,7 +1448,7 @@ define i32 @lowbitmask_casted_shift_use2(i8 %x) { define <2 x i59> @lowbitmask_casted_shift_vec_splat(<2 x i47> %x) { ; CHECK-LABEL: @lowbitmask_casted_shift_vec_splat( ; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i47> [[X:%.*]] to <2 x i59> -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i59> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i59> [[TMP1]], splat (i59 5) ; CHECK-NEXT: ret <2 x i59> [[R]] ; %a = ashr <2 x i47> %x, @@ -1525,10 +1525,10 @@ define i32 @not_lowmask_sext_in_reg2(i32 %x) { define <2 x i32> @lowmask_sext_in_reg_splat(<2 x i32> %x, ptr %p) { ; CHECK-LABEL: @lowmask_sext_in_reg_splat( -; CHECK-NEXT: [[L:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i32> [[L]], +; CHECK-NEXT: [[L:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 20) +; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i32> [[L]], splat (i32 20) ; CHECK-NEXT: store <2 x i32> [[R]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X]], splat (i32 4095) ; CHECK-NEXT: ret <2 x i32> [[AND]] ; %l = shl <2 x i32> %x, @@ -1576,9 +1576,9 @@ define i8 @lowmask_add_2_uses(i8 %x) { define <2 x i8> @lowmask_add_2_splat(<2 x i8> %x, ptr %p) { ; CHECK-LABEL: @lowmask_add_2_splat( -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -64) ; CHECK-NEXT: store <2 x i8> [[A]], ptr [[P:%.*]], align 2 -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X]], splat (i8 63) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = add <2 x i8> %x, ; 0xc0 @@ -1619,9 +1619,9 @@ define i8 @not_lowmask_add2(i8 %x) { define <2 x i8> @lowmask_add_splat(<2 x i8> %x, ptr %p) { ; CHECK-LABEL: @lowmask_add_splat( -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -64) ; CHECK-NEXT: store <2 x i8> [[A]], ptr [[P:%.*]], align 2 -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X]], splat (i8 32) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = add <2 x i8> %x, ; 0xc0 @@ -1670,8 +1670,8 @@ define i8 @flip_masked_bit(i8 %A) { define <2 x i8> @flip_masked_bit_uniform(<2 x i8> %A) { ; CHECK-LABEL: @flip_masked_bit_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = xor <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 16) +; CHECK-NEXT: [[C:%.*]] = xor <2 x i8> [[TMP1]], splat (i8 16) ; CHECK-NEXT: ret <2 x i8> [[C]] ; %B = add <2 x i8> %A, @@ -1681,7 +1681,7 @@ define <2 x i8> @flip_masked_bit_uniform(<2 x i8> %A) { define <2 x i8> @flip_masked_bit_poison(<2 x i8> %A) { ; CHECK-LABEL: @flip_masked_bit_poison( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: [[C:%.*]] = and <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[C]] ; @@ -1692,7 +1692,7 @@ define <2 x i8> @flip_masked_bit_poison(<2 x i8> %A) { define <2 x i8> @flip_masked_bit_nonuniform(<2 x i8> %A) { ; CHECK-LABEL: @flip_masked_bit_nonuniform( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: [[C:%.*]] = and <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[C]] ; @@ -2084,8 +2084,8 @@ define i16 @shl_ashr_pow2_const_case1(i16 %x) { define <3 x i16> @shl_lshr_pow2_const_case1_uniform_vec(<3 x i16> %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case1_uniform_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], splat (i16 7) +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 8), <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %shl = shl <3 x i16> , %x @@ -2123,7 +2123,7 @@ define <3 x i16> @shl_lshr_pow2_const_case1_non_uniform_vec_negative(<3 x i16> % define <3 x i16> @shl_lshr_pow2_const_case1_poison1_vec(<3 x i16> %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case1_poison1_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 8), <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %shl = shl <3 x i16> , %x @@ -2135,7 +2135,7 @@ define <3 x i16> @shl_lshr_pow2_const_case1_poison1_vec(<3 x i16> %x) { define <3 x i16> @shl_lshr_pow2_const_case1_poison2_vec(<3 x i16> %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case1_poison2_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 8), <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %shl = shl <3 x i16> , %x @@ -2146,8 +2146,8 @@ define <3 x i16> @shl_lshr_pow2_const_case1_poison2_vec(<3 x i16> %x) { define <3 x i16> @shl_lshr_pow2_const_case1_poison3_vec(<3 x i16> %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case1_poison3_vec( -; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> , [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], +; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> splat (i16 16), [[X:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], splat (i16 5) ; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], ; CHECK-NEXT: ret <3 x i16> [[R]] ; @@ -2380,8 +2380,8 @@ define i16 @lshr_shl_pow2_const_negative_oneuse(i16 %x) { define <3 x i16> @lshr_shl_pow2_const_case1_uniform_vec(<3 x i16> %x) { ; CHECK-LABEL: @lshr_shl_pow2_const_case1_uniform_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], splat (i16 12) +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 128), <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %lshr = lshr <3 x i16> , %x @@ -2419,7 +2419,7 @@ define <3 x i16> @lshr_shl_pow2_const_case1_non_uniform_vec_negative(<3 x i16> % define <3 x i16> @lshr_shl_pow2_const_case1_poison1_vec(<3 x i16> %x) { ; CHECK-LABEL: @lshr_shl_pow2_const_case1_poison1_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> , <3 x i16> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 128), <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %lshr = lshr <3 x i16> , %x @@ -2430,9 +2430,9 @@ define <3 x i16> @lshr_shl_pow2_const_case1_poison1_vec(<3 x i16> %x) { define <3 x i16> @lshr_shl_pow2_const_case1_poison2_vec(<3 x i16> %x) { ; CHECK-LABEL: @lshr_shl_pow2_const_case1_poison2_vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> , [[X:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> splat (i16 8192), [[X:%.*]] ; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> [[LSHR]], -; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[SHL]], splat (i16 128) ; CHECK-NEXT: ret <3 x i16> [[R]] ; %lshr = lshr <3 x i16> , %x @@ -2443,8 +2443,8 @@ define <3 x i16> @lshr_shl_pow2_const_case1_poison2_vec(<3 x i16> %x) { define <3 x i16> @lshr_shl_pow2_const_case1_poison3_vec(<3 x i16> %x) { ; CHECK-LABEL: @lshr_shl_pow2_const_case1_poison3_vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> , [[X:%.*]] -; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> [[LSHR]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> splat (i16 8192), [[X:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> [[LSHR]], splat (i16 6) ; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[SHL]], ; CHECK-NEXT: ret <3 x i16> [[R]] ; @@ -2564,7 +2564,7 @@ define i32 @and_zext_multiuse(i32 %a, i1 %b) { define <2 x i32> @and_zext_vec(<2 x i32> %a, <2 x i1> %b) { ; CHECK-LABEL: @and_zext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -2811,8 +2811,8 @@ define i32 @add_constant_equal_with_the_top_bit_of_demandedbits_pass(i32 %x) { define <2 x i16> @add_constant_equal_with_the_top_bit_of_demandedbits_pass_vector(<2 x i16> %x) { ; CHECK-LABEL: @add_constant_equal_with_the_top_bit_of_demandedbits_pass_vector( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[AND:%.*]] = xor <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 24) +; CHECK-NEXT: [[AND:%.*]] = xor <2 x i16> [[TMP1]], splat (i16 16) ; CHECK-NEXT: ret <2 x i16> [[AND]] ; %add = add <2 x i16> %x, diff --git a/llvm/test/Transforms/InstCombine/and2.ll b/llvm/test/Transforms/InstCombine/and2.ll index 104486e7638f56..ce49f4e8f6f12e 100644 --- a/llvm/test/Transforms/InstCombine/and2.ll +++ b/llvm/test/Transforms/InstCombine/and2.ll @@ -83,8 +83,8 @@ define i1 @test8_logical(i32 %i) { define <2 x i1> @test8vec(<2 x i32> %i) { ; CHECK-LABEL: @test8vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[I:%.*]], -; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[I:%.*]], splat (i32 -1) +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 13) ; CHECK-NEXT: ret <2 x i1> [[COND]] ; %cmp1 = icmp ne <2 x i32> %i, zeroinitializer @@ -107,7 +107,7 @@ define i64 @test9(i64 %x) { ; combine -x & 1 into x & 1 define <2 x i64> @test9vec(<2 x i64> %x) { ; CHECK-LABEL: @test9vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[X:%.*]], splat (i64 1) ; CHECK-NEXT: ret <2 x i64> [[AND]] ; %sub = sub nsw <2 x i64> , %x @@ -207,7 +207,7 @@ define i8 @and1_lshr1_is_cmp_eq_0_multiuse(i8 %x) { define <2 x i8> @and1_lshr1_is_cmp_eq_0_vec(<2 x i8> %x) { ; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0_vec( -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> splat (i8 1), [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[SH]] ; %sh = lshr <2 x i8> , %x diff --git a/llvm/test/Transforms/InstCombine/apint-add.ll b/llvm/test/Transforms/InstCombine/apint-add.ll index 7a6ffed919a414..c4398a61debc06 100644 --- a/llvm/test/Transforms/InstCombine/apint-add.ll +++ b/llvm/test/Transforms/InstCombine/apint-add.ll @@ -36,7 +36,7 @@ define i15 @test3(i15 %x) { ; X + signbit --> X ^ signbit define <2 x i5> @test3vec(<2 x i5> %x) { ; CHECK-LABEL: @test3vec( -; CHECK-NEXT: [[Y:%.*]] = xor <2 x i5> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = xor <2 x i5> [[X:%.*]], splat (i5 -16) ; CHECK-NEXT: ret <2 x i5> [[Y]] ; %y = add <2 x i5> %x, diff --git a/llvm/test/Transforms/InstCombine/apint-mul1.ll b/llvm/test/Transforms/InstCombine/apint-mul1.ll index 2a00275db2606f..7327630c5d0a03 100644 --- a/llvm/test/Transforms/InstCombine/apint-mul1.ll +++ b/llvm/test/Transforms/InstCombine/apint-mul1.ll @@ -15,7 +15,7 @@ define i17 @test1(i17 %X) { define <2 x i17> @test2(<2 x i17> %X) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[Y:%.*]] = shl <2 x i17> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = shl <2 x i17> [[X:%.*]], splat (i17 10) ; CHECK-NEXT: ret <2 x i17> [[Y]] ; %Y = mul <2 x i17> %X, diff --git a/llvm/test/Transforms/InstCombine/apint-mul2.ll b/llvm/test/Transforms/InstCombine/apint-mul2.ll index 12c44755b7e4a3..b3937f0965dcf2 100644 --- a/llvm/test/Transforms/InstCombine/apint-mul2.ll +++ b/llvm/test/Transforms/InstCombine/apint-mul2.ll @@ -16,7 +16,7 @@ define i177 @test1(i177 %X) { define <2 x i177> @test2(<2 x i177> %X) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[Y:%.*]] = shl <2 x i177> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = shl <2 x i177> [[X:%.*]], splat (i177 155) ; CHECK-NEXT: ret <2 x i177> [[Y]] ; %C = shl <2 x i177> , diff --git a/llvm/test/Transforms/InstCombine/apint-select.ll b/llvm/test/Transforms/InstCombine/apint-select.ll index c96c3dc3686935..247be32751ae84 100644 --- a/llvm/test/Transforms/InstCombine/apint-select.ll +++ b/llvm/test/Transforms/InstCombine/apint-select.ll @@ -63,7 +63,7 @@ define <2 x i32> @sext_vec(<2 x i1> %C) { define <2 x i999> @not_zext_vec(<2 x i1> %C) { ; CHECK-LABEL: @not_zext_vec( -; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], +; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], splat (i1 true) ; CHECK-NEXT: [[V:%.*]] = zext <2 x i1> [[NOT_C]] to <2 x i999> ; CHECK-NEXT: ret <2 x i999> [[V]] ; @@ -73,7 +73,7 @@ define <2 x i999> @not_zext_vec(<2 x i1> %C) { define <2 x i64> @not_sext_vec(<2 x i1> %C) { ; CHECK-LABEL: @not_sext_vec( -; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], +; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], splat (i1 true) ; CHECK-NEXT: [[V:%.*]] = sext <2 x i1> [[NOT_C]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[V]] ; @@ -85,7 +85,7 @@ define <2 x i64> @not_sext_vec(<2 x i1> %C) { define <2 x i32> @scalar_select_of_vectors(i1 %c) { ; CHECK-LABEL: @scalar_select_of_vectors( -; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[V]] ; %V = select i1 %c, <2 x i32> , <2 x i32> zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/apint-shift.ll b/llvm/test/Transforms/InstCombine/apint-shift.ll index 21c6c18009d1df..3cc530bdbd0210 100644 --- a/llvm/test/Transforms/InstCombine/apint-shift.ll +++ b/llvm/test/Transforms/InstCombine/apint-shift.ll @@ -95,7 +95,7 @@ define i19 @test10(i19 %X) { define <2 x i19> @lshr_lshr_splat_vec(<2 x i19> %X) { ; CHECK-LABEL: @lshr_lshr_splat_vec( -; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i19> [[X:%.*]], +; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i19> [[X:%.*]], splat (i19 5) ; CHECK-NEXT: ret <2 x i19> [[SH1]] ; %sh1 = lshr <2 x i19> %X, @@ -118,8 +118,8 @@ define i9 @multiuse_lshr_lshr(i9 %x) { define <2 x i9> @multiuse_lshr_lshr_splat(<2 x i9> %x) { ; CHECK-LABEL: @multiuse_lshr_lshr_splat( -; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i9> [[X:%.*]], -; CHECK-NEXT: [[SH2:%.*]] = lshr <2 x i9> [[X]], +; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i9> [[X:%.*]], splat (i9 2) +; CHECK-NEXT: [[SH2:%.*]] = lshr <2 x i9> [[X]], splat (i9 5) ; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i9> [[SH1]], [[SH2]] ; CHECK-NEXT: ret <2 x i9> [[MUL]] ; @@ -134,7 +134,7 @@ define <2 x i9> @multiuse_lshr_lshr_splat(<2 x i9> %x) { define <2 x i19> @shl_shl_splat_vec(<2 x i19> %X) { ; CHECK-LABEL: @shl_shl_splat_vec( -; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i19> [[X:%.*]], +; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i19> [[X:%.*]], splat (i19 5) ; CHECK-NEXT: ret <2 x i19> [[SH1]] ; %sh1 = shl <2 x i19> %X, @@ -157,8 +157,8 @@ define i42 @multiuse_shl_shl(i42 %x) { define <2 x i42> @multiuse_shl_shl_splat(<2 x i42> %x) { ; CHECK-LABEL: @multiuse_shl_shl_splat( -; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i42> [[X:%.*]], -; CHECK-NEXT: [[SH2:%.*]] = shl <2 x i42> [[X]], +; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i42> [[X:%.*]], splat (i42 8) +; CHECK-NEXT: [[SH2:%.*]] = shl <2 x i42> [[X]], splat (i42 17) ; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i42> [[SH1]], [[SH2]] ; CHECK-NEXT: ret <2 x i42> [[MUL]] ; @@ -173,7 +173,7 @@ define <2 x i42> @multiuse_shl_shl_splat(<2 x i42> %x) { define <2 x i19> @eq_shl_lshr_splat_vec(<2 x i19> %X) { ; CHECK-LABEL: @eq_shl_lshr_splat_vec( -; CHECK-NEXT: [[SH1:%.*]] = and <2 x i19> [[X:%.*]], +; CHECK-NEXT: [[SH1:%.*]] = and <2 x i19> [[X:%.*]], splat (i19 65535) ; CHECK-NEXT: ret <2 x i19> [[SH1]] ; %sh1 = shl <2 x i19> %X, @@ -186,7 +186,7 @@ define <2 x i19> @eq_shl_lshr_splat_vec(<2 x i19> %X) { define <2 x i19> @eq_lshr_shl_splat_vec(<2 x i19> %X) { ; CHECK-LABEL: @eq_lshr_shl_splat_vec( -; CHECK-NEXT: [[SH1:%.*]] = and <2 x i19> [[X:%.*]], +; CHECK-NEXT: [[SH1:%.*]] = and <2 x i19> [[X:%.*]], splat (i19 -8) ; CHECK-NEXT: ret <2 x i19> [[SH1]] ; %sh1 = lshr <2 x i19> %X, @@ -199,8 +199,8 @@ define <2 x i19> @eq_lshr_shl_splat_vec(<2 x i19> %X) { define <2 x i7> @lshr_shl_splat_vec(<2 x i7> %X) { ; CHECK-LABEL: @lshr_shl_splat_vec( -; CHECK-NEXT: [[DOTNEG:%.*]] = mul <2 x i7> [[X:%.*]], -; CHECK-NEXT: [[SH2:%.*]] = and <2 x i7> [[DOTNEG]], +; CHECK-NEXT: [[DOTNEG:%.*]] = mul <2 x i7> [[X:%.*]], splat (i7 60) +; CHECK-NEXT: [[SH2:%.*]] = and <2 x i7> [[DOTNEG]], splat (i7 60) ; CHECK-NEXT: ret <2 x i7> [[SH2]] ; %mul = mul <2 x i7> %X, @@ -214,8 +214,8 @@ define <2 x i7> @lshr_shl_splat_vec(<2 x i7> %X) { define <2 x i7> @shl_lshr_splat_vec(<2 x i7> %X) { ; CHECK-LABEL: @shl_lshr_splat_vec( -; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i7> [[X:%.*]], -; CHECK-NEXT: [[SH1:%.*]] = shl nuw nsw <2 x i7> [[DIV]], +; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i7> [[X:%.*]], splat (i7 9) +; CHECK-NEXT: [[SH1:%.*]] = shl nuw nsw <2 x i7> [[DIV]], splat (i7 1) ; CHECK-NEXT: ret <2 x i7> [[SH1]] ; %div = udiv <2 x i7> %X, @@ -250,7 +250,7 @@ define i47 @test12(i47 %X) { define <2 x i47> @test12_splat_vec(<2 x i47> %X) { ; CHECK-LABEL: @test12_splat_vec( -; CHECK-NEXT: [[SH1:%.*]] = and <2 x i47> [[X:%.*]], +; CHECK-NEXT: [[SH1:%.*]] = and <2 x i47> [[X:%.*]], splat (i47 -256) ; CHECK-NEXT: ret <2 x i47> [[SH1]] ; %sh1 = ashr <2 x i47> %X, @@ -328,7 +328,7 @@ define i1 @test16(i84 %X) { define <2 x i1> @test16vec(<2 x i84> %X) { ; CHECK-LABEL: @test16vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i84> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i84> [[X:%.*]], splat (i84 16) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i84> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -375,8 +375,8 @@ define i1 @test17(i106 %A) { define <2 x i1> @test17vec(<2 x i106> %A) { ; CHECK-LABEL: @test17vec( -; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i106> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i106> [[B_MASK]], +; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i106> [[A:%.*]], splat (i106 -8) +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i106> [[B_MASK]], splat (i106 9872) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = lshr <2 x i106> %A, @@ -405,7 +405,7 @@ define i1 @test19(i37 %A) { define <2 x i1> @test19vec(<2 x i37> %A) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i37> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i37> [[A:%.*]], splat (i37 4) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i37> %A, @@ -425,7 +425,7 @@ define i1 @test19a(i39 %A) { define <2 x i1> @test19a_vec(<2 x i39> %A) { ; CHECK-LABEL: @test19a_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i39> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i39> [[A:%.*]], splat (i39 -5) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i39> %A, @@ -494,8 +494,8 @@ define i44 @shl_lshr_eq_amt_multi_use(i44 %A) { define <2 x i44> @shl_lshr_eq_amt_multi_use_splat_vec(<2 x i44> %A) { ; CHECK-LABEL: @shl_lshr_eq_amt_multi_use_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl <2 x i44> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = and <2 x i44> [[A]], +; CHECK-NEXT: [[B:%.*]] = shl <2 x i44> [[A:%.*]], splat (i44 33) +; CHECK-NEXT: [[C:%.*]] = and <2 x i44> [[A]], splat (i44 2047) ; CHECK-NEXT: [[D:%.*]] = or disjoint <2 x i44> [[B]], [[C]] ; CHECK-NEXT: ret <2 x i44> [[D]] ; @@ -524,8 +524,8 @@ define i43 @lshr_shl_eq_amt_multi_use(i43 %A) { define <2 x i43> @lshr_shl_eq_amt_multi_use_splat_vec(<2 x i43> %A) { ; CHECK-LABEL: @lshr_shl_eq_amt_multi_use_splat_vec( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i43> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = and <2 x i43> [[A]], +; CHECK-NEXT: [[B:%.*]] = lshr <2 x i43> [[A:%.*]], splat (i43 23) +; CHECK-NEXT: [[C:%.*]] = and <2 x i43> [[A]], splat (i43 -8388608) ; CHECK-NEXT: [[D:%.*]] = mul <2 x i43> [[B]], [[C]] ; CHECK-NEXT: ret <2 x i43> [[D]] ; diff --git a/llvm/test/Transforms/InstCombine/ashr-demand.ll b/llvm/test/Transforms/InstCombine/ashr-demand.ll index a0e2af93b809bf..43593612320f64 100644 --- a/llvm/test/Transforms/InstCombine/ashr-demand.ll +++ b/llvm/test/Transforms/InstCombine/ashr-demand.ll @@ -31,8 +31,8 @@ define i32 @srem8_ashr_mask(i32 %a0) { define <2 x i32> @srem2_ashr_mask_vector(<2 x i32> %a0) { ; CHECK-LABEL: @srem2_ashr_mask_vector( -; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SREM]], +; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], splat (i32 2) +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SREM]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[MASK]] ; %srem = srem <2 x i32> %a0, @@ -43,8 +43,8 @@ define <2 x i32> @srem2_ashr_mask_vector(<2 x i32> %a0) { define <2 x i32> @srem2_ashr_mask_vector_nonconstant(<2 x i32> %a0, <2 x i32> %a1) { ; CHECK-LABEL: @srem2_ashr_mask_vector_nonconstant( -; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SREM]], +; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], splat (i32 2) +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SREM]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[MASK]] ; %srem = srem <2 x i32> %a0, diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll index 1abf1be2cbedd9..60debf02b58621 100644 --- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll +++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll @@ -419,7 +419,7 @@ define i32 @ashr_lshr_no_lshr(i32 %x, i32 %y) { define <2 x i32> @ashr_lshr_vec_wrong_pred(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @ashr_lshr_vec_wrong_pred( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[L:%.*]] = lshr <2 x i32> [[X]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = ashr <2 x i32> [[X]], [[Y]] ; CHECK-NEXT: [[RET:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[L]], <2 x i32> [[R]] diff --git a/llvm/test/Transforms/InstCombine/avg-lsb.ll b/llvm/test/Transforms/InstCombine/avg-lsb.ll index 1e9e4e3bcafb27..f356dd1cab11ef 100644 --- a/llvm/test/Transforms/InstCombine/avg-lsb.ll +++ b/llvm/test/Transforms/InstCombine/avg-lsb.ll @@ -34,7 +34,7 @@ define i8 @avg_lsb_mismatch(i8 %a, i8 %b) { define <2 x i8> @avg_lsb_vector(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: define <2 x i8> @avg_lsb_vector( ; CHECK-SAME: <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) { -; CHECK-NEXT: [[REM:%.*]] = and <2 x i8> [[A]], +; CHECK-NEXT: [[REM:%.*]] = and <2 x i8> [[A]], splat (i8 1) ; CHECK-NEXT: [[DIV2:%.*]] = and <2 x i8> [[B]], [[REM]] ; CHECK-NEXT: ret <2 x i8> [[DIV2]] ; diff --git a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll index 4b5de41fc7095d..75bdf66ab64284 100644 --- a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll +++ b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll @@ -336,7 +336,7 @@ define <2 x i8> @lshr_add_or_fail_dif_masks(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @shl_or_or_good_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_or_or_good_mask( ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[BW1:%.*]] = or <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; @@ -350,7 +350,7 @@ define <2 x i8> @shl_or_or_good_mask(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @shl_or_or_fail_bad_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_or_or_fail_bad_mask( ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[BW1:%.*]] = or <2 x i8> [[TMP2]], ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; @@ -394,7 +394,7 @@ define <2 x i8> @lshr_or_xor_good_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @lshr_or_xor_good_mask( ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i8> [[X:%.*]], [[TMP1]] -; CHECK-NEXT: [[BW1:%.*]] = lshr <2 x i8> [[TMP2]], +; CHECK-NEXT: [[BW1:%.*]] = lshr <2 x i8> [[TMP2]], splat (i8 6) ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; %shift1 = lshr <2 x i8> %x, @@ -406,8 +406,8 @@ define <2 x i8> @lshr_or_xor_good_mask(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @lshr_or_xor_fail_bad_mask(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @lshr_or_xor_fail_bad_mask( -; CHECK-NEXT: [[SHIFT1:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SHIFT2:%.*]] = lshr <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[SHIFT1:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 6) +; CHECK-NEXT: [[SHIFT2:%.*]] = lshr <2 x i8> [[Y:%.*]], splat (i8 6) ; CHECK-NEXT: [[BW2:%.*]] = or <2 x i8> [[SHIFT2]], ; CHECK-NEXT: [[BW1:%.*]] = xor <2 x i8> [[SHIFT1]], [[BW2]] ; CHECK-NEXT: ret <2 x i8> [[BW1]] @@ -633,7 +633,7 @@ define i8 @and_ashr_not_fail_invalid_xor_constant(i8 %x, i8 %y, i8 %shamt) { define <4 x i8> @and_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @and_ashr_not_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[AND:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] ; CHECK-NEXT: ret <4 x i8> [[AND]] @@ -647,7 +647,7 @@ define <4 x i8> @and_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { define <4 x i8> @and_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @and_ashr_not_vec_commuted( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[AND:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] ; CHECK-NEXT: ret <4 x i8> [[AND]] @@ -661,7 +661,7 @@ define <4 x i8> @and_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %s define <4 x i8> @and_ashr_not_vec_poison_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @and_ashr_not_vec_poison_1( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[AND:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] ; CHECK-NEXT: ret <4 x i8> [[AND]] @@ -767,7 +767,7 @@ define i8 @or_ashr_not_fail_invalid_xor_constant(i8 %x, i8 %y, i8 %shamt) { define <4 x i8> @or_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @or_ashr_not_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] ; CHECK-NEXT: ret <4 x i8> [[OR]] @@ -781,7 +781,7 @@ define <4 x i8> @or_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { define <4 x i8> @or_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @or_ashr_not_vec_commuted( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] ; CHECK-NEXT: ret <4 x i8> [[OR]] @@ -795,7 +795,7 @@ define <4 x i8> @or_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %sh define <4 x i8> @or_ashr_not_vec_poison_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @or_ashr_not_vec_poison_1( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] ; CHECK-NEXT: ret <4 x i8> [[OR]] @@ -902,7 +902,7 @@ define <4 x i8> @xor_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { ; CHECK-LABEL: @xor_ashr_not_vec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[DOTNOT:%.*]] = ashr <4 x i8> [[TMP1]], [[SHAMT:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], +; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], splat (i8 -1) ; CHECK-NEXT: ret <4 x i8> [[XOR]] ; %x.shift = ashr <4 x i8> %x, %shamt @@ -916,7 +916,7 @@ define <4 x i8> @xor_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %s ; CHECK-LABEL: @xor_ashr_not_vec_commuted( ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[DOTNOT:%.*]] = ashr <4 x i8> [[TMP1]], [[SHAMT:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], +; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], splat (i8 -1) ; CHECK-NEXT: ret <4 x i8> [[XOR]] ; %x.shift = ashr <4 x i8> %x, %shamt @@ -930,7 +930,7 @@ define <4 x i8> @xor_ashr_not_vec_poison_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %s ; CHECK-LABEL: @xor_ashr_not_vec_poison_1( ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[DOTNOT:%.*]] = ashr <4 x i8> [[TMP1]], [[SHAMT:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], +; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], splat (i8 -1) ; CHECK-NEXT: ret <4 x i8> [[XOR]] ; %x.shift = ashr <4 x i8> %x, %shamt diff --git a/llvm/test/Transforms/InstCombine/binop-cast.ll b/llvm/test/Transforms/InstCombine/binop-cast.ll index 9d3b18c5e79ed5..7330d440982995 100644 --- a/llvm/test/Transforms/InstCombine/binop-cast.ll +++ b/llvm/test/Transforms/InstCombine/binop-cast.ll @@ -151,7 +151,7 @@ define i32 @or_sext_to_sel(i32 %x, i1 %y) { define <2 x i32> @or_sext_to_sel_constant_vec(<2 x i1> %y) { ; CHECK-LABEL: @or_sext_to_sel_constant_vec( -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> splat (i32 -1), <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; %sext = sext <2 x i1> %y to <2 x i32> @@ -162,7 +162,7 @@ define <2 x i32> @or_sext_to_sel_constant_vec(<2 x i1> %y) { define <2 x i32> @or_sext_to_sel_swap(<2 x i32> %px, <2 x i1> %y) { ; CHECK-LABEL: @or_sext_to_sel_swap( ; CHECK-NEXT: [[X:%.*]] = mul <2 x i32> [[PX:%.*]], [[PX]] -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> , <2 x i32> [[X]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> splat (i32 -1), <2 x i32> [[X]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %x = mul <2 x i32> %px, %px ; thwart complexity-based canonicalization diff --git a/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll b/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll index a16ad4ddb806f6..e10f83f5eee0ba 100644 --- a/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll +++ b/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll @@ -179,7 +179,7 @@ define i8 @shl_or_commuted(i8 %x) { define <2 x i8> @shl_or_splat(<2 x i8> %x) { ; CHECK-LABEL: define <2 x i8> @shl_or_splat ; CHECK-SAME: (<2 x i8> [[X:%.*]]) { -; CHECK-NEXT: [[BINOP:%.*]] = shl <2 x i8> , [[X]] +; CHECK-NEXT: [[BINOP:%.*]] = shl <2 x i8> splat (i8 22), [[X]] ; CHECK-NEXT: ret <2 x i8> [[BINOP]] ; %shift = shl <2 x i8> , %x diff --git a/llvm/test/Transforms/InstCombine/binop-select-cast-of-select-cond.ll b/llvm/test/Transforms/InstCombine/binop-select-cast-of-select-cond.ll index b57b96db306ccc..a3cd0328e9b590 100644 --- a/llvm/test/Transforms/InstCombine/binop-select-cast-of-select-cond.ll +++ b/llvm/test/Transforms/InstCombine/binop-select-cast-of-select-cond.ll @@ -130,7 +130,7 @@ define i64 @select_zext_different_condition(i1 %c, i1 %d) { define <2 x i64> @vector_test(i1 %c) { ; CHECK-LABEL: define <2 x i64> @vector_test ; CHECK-SAME: (i1 [[C:%.*]]) { -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], <2 x i64> , <2 x i64> +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], <2 x i64> splat (i64 64), <2 x i64> splat (i64 1) ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[C]] to i64 ; CHECK-NEXT: [[VEC0:%.*]] = insertelement <2 x i64> poison, i64 [[EXT]], i64 0 ; CHECK-NEXT: [[VEC1:%.*]] = shufflevector <2 x i64> [[VEC0]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -215,8 +215,8 @@ define i6 @sub_select_zext_op_swapped_non_const_args(i1 %c, i6 %argT, i6 %argF) define <2 x i8> @vectorized_add(<2 x i1> %c, <2 x i8> %arg) { ; CHECK-LABEL: define <2 x i8> @vectorized_add ; CHECK-SAME: (<2 x i1> [[C:%.*]], <2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[ARG]], -; CHECK-NEXT: [[ADD:%.*]] = select <2 x i1> [[C]], <2 x i8> [[TMP1]], <2 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[ARG]], splat (i8 1) +; CHECK-NEXT: [[ADD:%.*]] = select <2 x i1> [[C]], <2 x i8> [[TMP1]], <2 x i8> splat (i8 1) ; CHECK-NEXT: ret <2 x i8> [[ADD]] ; %zext = zext <2 x i1> %c to <2 x i8> diff --git a/llvm/test/Transforms/InstCombine/binop-select.ll b/llvm/test/Transforms/InstCombine/binop-select.ll index 6cd4132eadd77b..25f624ee134126 100644 --- a/llvm/test/Transforms/InstCombine/binop-select.ll +++ b/llvm/test/Transforms/InstCombine/binop-select.ll @@ -113,7 +113,7 @@ define i32 @test_sub_deduce_false(i32 %x, i32 %y) { define <2 x i8> @test_sub_dont_deduce_with_undef_cond_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @test_sub_dont_deduce_with_undef_cond_vec( ; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[C_NOT]], <2 x i8> , <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[C_NOT]], <2 x i8> splat (i8 7), <2 x i8> [[Y:%.*]] ; CHECK-NEXT: [[SUB:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[X]], <2 x i8> [[COND]]) ; CHECK-NEXT: ret <2 x i8> [[SUB]] ; @@ -126,7 +126,7 @@ define <2 x i8> @test_sub_dont_deduce_with_undef_cond_vec(<2 x i8> %x, <2 x i8> define <2 x i8> @test_sub_dont_deduce_with_poison_cond_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @test_sub_dont_deduce_with_poison_cond_vec( ; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[C_NOT]], <2 x i8> , <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[C_NOT]], <2 x i8> splat (i8 7), <2 x i8> [[Y:%.*]] ; CHECK-NEXT: [[SUB:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[X]], <2 x i8> [[COND]]) ; CHECK-NEXT: ret <2 x i8> [[SUB]] ; @@ -369,7 +369,7 @@ define <2 x half> @fmul_sel_op1(i1 %b, <2 x half> %p) { define <2 x half> @fmul_sel_op1_use(i1 %b, <2 x half> %p) { ; CHECK-LABEL: @fmul_sel_op1_use( ; CHECK-NEXT: [[X:%.*]] = fadd <2 x half> [[P:%.*]], -; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], <2 x half> zeroinitializer, <2 x half> +; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], <2 x half> zeroinitializer, <2 x half> splat (half 0xHFFFF) ; CHECK-NEXT: call void @use_v2f16(<2 x half> [[S]]) ; CHECK-NEXT: [[R:%.*]] = fmul nnan nsz <2 x half> [[X]], [[S]] ; CHECK-NEXT: ret <2 x half> [[R]] diff --git a/llvm/test/Transforms/InstCombine/bit-checks.ll b/llvm/test/Transforms/InstCombine/bit-checks.ll index 3e3426d951eb9e..43cd6dd1211b00 100644 --- a/llvm/test/Transforms/InstCombine/bit-checks.ll +++ b/llvm/test/Transforms/InstCombine/bit-checks.ll @@ -290,8 +290,8 @@ define i32 @main4(i32 %argc) { define <2 x i32> @main4_splat(<2 x i32> %argc) { ; CHECK-LABEL: @main4_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARGC:%.*]], -; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARGC:%.*]], splat (i32 55) +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 55) ; CHECK-NEXT: [[STOREMERGE:%.*]] = zext <2 x i1> [[AND_COND]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[STOREMERGE]] ; @@ -1356,7 +1356,7 @@ define <2 x i1> @no_masks_with_logical_or_vec_poison1(<2 x i32> %a, <2 x i32> %b ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne <2 x i32> [[B:%.*]], ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[OR2:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> , <2 x i1> [[CMP2]] +; CHECK-NEXT: [[OR2:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> splat (i1 true), <2 x i1> [[CMP2]] ; CHECK-NEXT: ret <2 x i1> [[OR2]] ; %cmp1 = icmp ne <2 x i32> %a, @@ -1371,8 +1371,8 @@ define <2 x i1> @no_masks_with_logical_or_vec_poison2(<2 x i32> %a, <2 x i32> %b ; CHECK-LABEL: @no_masks_with_logical_or_vec_poison2( ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne <2 x i32> [[B:%.*]], ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], -; CHECK-NEXT: [[OR2:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> , <2 x i1> [[CMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[OR2:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> splat (i1 true), <2 x i1> [[CMP2]] ; CHECK-NEXT: ret <2 x i1> [[OR2]] ; %cmp1 = icmp ne <2 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll index 79665be01576a7..0551a5cb5e2f27 100644 --- a/llvm/test/Transforms/InstCombine/bit_ceil.ll +++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll @@ -268,11 +268,11 @@ define i32 @bit_ceil_32_sub_used_twice(i32 %x, ptr %p) { ; a vector version of @bit_ceil_32 above define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) { ; CHECK-LABEL: @bit_ceil_v4i32( -; CHECK-NEXT: [[DEC:%.*]] = add <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[DEC:%.*]] = add <4 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[DEC]], i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> zeroinitializer, [[CTLZ]] -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], -; CHECK-NEXT: [[SEL:%.*]] = shl nuw <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], splat (i32 31) +; CHECK-NEXT: [[SEL:%.*]] = shl nuw <4 x i32> splat (i32 1), [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[SEL]] ; %dec = add <4 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/bit_floor.ll b/llvm/test/Transforms/InstCombine/bit_floor.ll index bd8aabf4431c0a..2872221e8aa87c 100644 --- a/llvm/test/Transforms/InstCombine/bit_floor.ll +++ b/llvm/test/Transforms/InstCombine/bit_floor.ll @@ -151,10 +151,10 @@ define i32 @bit_floor_shl_used_twice(i32 %x, ptr %p) { define <4 x i32> @bit_floor_v4i32(<4 x i32> %x) { ; CHECK-LABEL: @bit_floor_v4i32( ; CHECK-NEXT: [[EQ0:%.*]] = icmp eq <4 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X]], splat (i32 1) ; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 1, 33) <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[LSHR]], i1 false) -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <4 x i32> , [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw <4 x i32> , [[SUB]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <4 x i32> splat (i32 32), [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw <4 x i32> splat (i32 1), [[SUB]] ; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[EQ0]], <4 x i32> zeroinitializer, <4 x i32> [[SHL]] ; CHECK-NEXT: ret <4 x i32> [[SEL]] ; diff --git a/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll b/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll index 3744d8c9171c7d..49e77009f3b1a0 100644 --- a/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll @@ -75,7 +75,7 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) { define <2 x i64> @is_negative(<4 x i32> %x) { ; CHECK-LABEL: @is_negative( -; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: [[NOTNOT:%.*]] = bitcast <4 x i32> [[X_LOBIT]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[NOTNOT]] ; @@ -91,7 +91,7 @@ define <2 x i64> @is_negative(<4 x i32> %x) { define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) { ; CHECK-LABEL: @is_negative_bonus_bitcast( -; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[X_LOBIT]] ; %lobit = ashr <4 x i32> %x, @@ -107,7 +107,7 @@ define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) { define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) { ; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i4> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i8> [[B]] ; %a = and <4 x i4> %x, diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll index 79e6370b7242f5..ce5a4635a9b0a2 100644 --- a/llvm/test/Transforms/InstCombine/bitcast.ll +++ b/llvm/test/Transforms/InstCombine/bitcast.ll @@ -77,7 +77,7 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) { define <2 x i64> @is_negative(<4 x i32> %x) { ; CHECK-LABEL: @is_negative( -; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: [[NOTNOT:%.*]] = bitcast <4 x i32> [[X_LOBIT]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[NOTNOT]] ; @@ -93,7 +93,7 @@ define <2 x i64> @is_negative(<4 x i32> %x) { define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) { ; CHECK-LABEL: @is_negative_bonus_bitcast( -; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[X_LOBIT]] ; %lobit = ashr <4 x i32> %x, @@ -109,7 +109,7 @@ define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) { define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) { ; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i4> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i8> [[B]] ; %a = and <4 x i4> %x, @@ -796,7 +796,7 @@ define double @copysign_idiom_f64(double %x, i64 %mag) { define <2 x float> @copysign_idiom_vec(<2 x float> %x) { ; CHECK-LABEL: @copysign_idiom_vec( -; CHECK-NEXT: [[Y:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: [[Y:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 1.000000e+00), <2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[Y]] ; %bits = bitcast <2 x float> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/bitreverse.ll b/llvm/test/Transforms/InstCombine/bitreverse.ll index fe44a7a77bdff8..2d303efbe66c82 100644 --- a/llvm/test/Transforms/InstCombine/bitreverse.ll +++ b/llvm/test/Transforms/InstCombine/bitreverse.ll @@ -362,7 +362,7 @@ define i32 @rev_i1(i1 %x) { define <2 x i8> @rev_v2i1(<2 x i1> %x) { ; CHECK-LABEL: @rev_v2i1( -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> , <2 x i8> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> splat (i8 -128), <2 x i8> zeroinitializer ; CHECK-NEXT: ret <2 x i8> [[R]] ; %z = zext <2 x i1> %x to <2 x i8> diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll index ddc0430896e7d1..f7268ec9df0905 100644 --- a/llvm/test/Transforms/InstCombine/bswap-fold.ll +++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll @@ -38,7 +38,7 @@ define i32 @lshr8_i32(i32 %x) { define <2 x i32> @lshr16_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @lshr16_v2i32( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 16) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %s = lshr <2 x i32> %x, @@ -222,7 +222,7 @@ define i16 @test7(i32 %A) { define <2 x i16> @test7_vector(<2 x i32> %A) { ; CHECK-LABEL: @test7_vector( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 16) ; CHECK-NEXT: [[D:%.*]] = trunc nuw <2 x i32> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[D]] ; @@ -246,7 +246,7 @@ define i16 @test8(i64 %A) { define <2 x i16> @test8_vector(<2 x i64> %A) { ; CHECK-LABEL: @test8_vector( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 48) ; CHECK-NEXT: [[D:%.*]] = trunc nuw <2 x i64> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[D]] ; @@ -447,7 +447,7 @@ define <2 x i32> @bs_xor32vec(<2 x i32> %a, <2 x i32> %b) #0 { define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-LABEL: @bs_and32ivec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -1585053440) ; CHECK-NEXT: [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[T2]] ; @@ -458,7 +458,7 @@ define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 { define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-LABEL: @bs_or32ivec( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 -1585053440) ; CHECK-NEXT: [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[T2]] ; @@ -469,7 +469,7 @@ define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 { define <2 x i32> @bs_xor32ivec(<2 x i32> %a, <2 x i32> %b) #0 { ; CHECK-LABEL: @bs_xor32ivec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], splat (i32 -1585053440) ; CHECK-NEXT: [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[T2]] ; @@ -837,8 +837,8 @@ define i32 @bs_active_high7(i32 %0) { define <2 x i64> @bs_active_high4(<2 x i64> %0) { ; CHECK-LABEL: @bs_active_high4( -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], splat (i64 4) +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 240) ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %2 = shl <2 x i64> %0, @@ -849,7 +849,7 @@ define <2 x i64> @bs_active_high4(<2 x i64> %0) { define <2 x i64> @bs_active_high_different(<2 x i64> %0) { ; CHECK-LABEL: @bs_active_high_different( ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], splat (i64 56) ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %2 = shl <2 x i64> %0, @@ -932,7 +932,7 @@ define i32 @bs_active_byte_3h(i32 %0) { define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) { ; CHECK-LABEL: @bs_active_byte_3h_v2( ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], splat (i32 8) ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %2 = and <2 x i32> %0, ; 0x0080'0000, 0x0001'0000 @@ -966,7 +966,7 @@ define i16 @bs_active_low1(i16 %0) { define <2 x i32> @bs_active_low8(<2 x i32> %0) { ; CHECK-LABEL: @bs_active_low8( -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], splat (i32 24) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %2 = and <2 x i32> %0, @@ -977,7 +977,7 @@ define <2 x i32> @bs_active_low8(<2 x i32> %0) { define <2 x i32> @bs_active_low_different(<2 x i32> %0) { ; CHECK-LABEL: @bs_active_low_different( ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], splat (i32 24) ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %2 = and <2 x i32> %0, @@ -1060,7 +1060,7 @@ define i32 @bs_active_byte_2l(i32 %0) { define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) { ; CHECK-LABEL: @bs_active_byte_2l_v2( ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], splat (i64 40) ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %2 = and <2 x i64> %0, ; 0x0100, 0xff00 diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index fa61bc841f0199..44e1616af86c54 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -110,10 +110,10 @@ define <2 x i32> @test2_vector(<2 x i32> %arg) { define <2 x i32> @test2_vector_poison(<2 x i32> %arg) { ; CHECK-LABEL: @test2_vector_poison( ; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[ARG:%.*]], -; CHECK-NEXT: [[T4:%.*]] = shl <2 x i32> [[ARG]], +; CHECK-NEXT: [[T4:%.*]] = shl <2 x i32> [[ARG]], splat (i32 8) ; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], ; CHECK-NEXT: [[T6:%.*]] = or disjoint <2 x i32> [[T2]], [[T5]] -; CHECK-NEXT: [[T8:%.*]] = lshr <2 x i32> [[ARG]], +; CHECK-NEXT: [[T8:%.*]] = lshr <2 x i32> [[ARG]], splat (i32 8) ; CHECK-NEXT: [[T9:%.*]] = and <2 x i32> [[T8]], ; CHECK-NEXT: [[T10:%.*]] = or disjoint <2 x i32> [[T6]], [[T9]] ; CHECK-NEXT: [[T12:%.*]] = lshr <2 x i32> [[ARG]], diff --git a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll index cde8efbafc5e5a..b7aed751a98977 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll @@ -371,7 +371,7 @@ define i8 @positive_biggershl_shlnuwnsw_ashrexact(i8 %x) { define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @positive_samevar_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[RET]] ; @@ -386,7 +386,7 @@ define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec( -; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -8) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %tmp0 = ashr <2 x i8> %x, @@ -397,7 +397,7 @@ define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) { define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef0( ; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], splat (i8 3) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %tmp0 = ashr <3 x i8> %x, @@ -407,7 +407,7 @@ define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) { define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; @@ -428,8 +428,8 @@ define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) { define <2 x i8> @positive_biggerashr_vec(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggerashr_vec( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 3) +; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -8) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %tmp0 = ashr <2 x i8> %x, @@ -440,7 +440,7 @@ define <2 x i8> @positive_biggerashr_vec(<2 x i8> %x) { define <3 x i8> @positive_biggerashr_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggerashr_vec_undef0( ; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], splat (i8 3) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %tmp0 = ashr <3 x i8> %x, @@ -450,7 +450,7 @@ define <3 x i8> @positive_biggerashr_vec_undef0(<3 x i8> %x) { define <3 x i8> @positive_biggerashr_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggerashr_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], splat (i8 6) ; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; @@ -472,8 +472,8 @@ define <3 x i8> @positive_biggerashr_vec_undef2(<3 x i8> %x) { define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggershl_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 3) +; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -64) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %tmp0 = ashr <2 x i8> %x, @@ -484,7 +484,7 @@ define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) { define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggershl_vec_undef0( ; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], splat (i8 6) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %tmp0 = ashr <3 x i8> %x, @@ -494,7 +494,7 @@ define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) { define <3 x i8> @positive_biggershl_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggershl_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[TMP0]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; @@ -563,7 +563,7 @@ define i8 @positive_biggershl_multiuse(i8 %x) { define <2 x i8> @positive_biggerashr_vec_nonsplat(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggerashr_vec_nonsplat( -; CHECK-NEXT: [[TMP0:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[RET:%.*]] = shl <2 x i8> [[TMP0]], ; CHECK-NEXT: ret <2 x i8> [[RET]] ; @@ -575,7 +575,7 @@ define <2 x i8> @positive_biggerashr_vec_nonsplat(<2 x i8> %x) { define <2 x i8> @positive_biggerLashr_vec_nonsplat(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggerLashr_vec_nonsplat( ; CHECK-NEXT: [[TMP0:%.*]] = ashr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <2 x i8> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = shl <2 x i8> [[TMP0]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %tmp0 = ashr <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll index b5ef1f466958d7..42c116f9d8f08c 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll @@ -450,8 +450,8 @@ define i32 @n19_oneuse9(i32 %x, i32 %replacement_low, i32 %replacement_high) { define <2 x i32> @t20_ult_slt_vec_splat(<2 x i32> %x, <2 x i32> %replacement_low, <2 x i32> %replacement_high) { ; CHECK-LABEL: @t20_ult_slt_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 -16) +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[X]], splat (i32 127) ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[X]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[REPLACEMENT_HIGH:%.*]], <2 x i32> [[TMP3]] ; CHECK-NEXT: ret <2 x i32> [[R]] @@ -486,9 +486,9 @@ declare void @use2xi1(<2 x i1>) declare void @use(<2 x i1>) define <2 x i32> @t22_uge_slt(<2 x i32> %x, <2 x i32> %replacement_low, <2 x i32> %replacement_high) { ; CHECK-LABEL: @t22_uge_slt( -; CHECK-NEXT: [[T0:%.*]] = icmp slt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[T1:%.*]] = select <2 x i1> [[T0]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[REPLACEMENT_HIGH:%.*]] -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[X]], +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[X]], splat (i32 16) ; CHECK-NEXT: [[T3:%.*]] = icmp uge <2 x i32> [[T2]], ; CHECK-NEXT: call void @use2xi1(<2 x i1> [[T3]]) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T3]], <2 x i32> [[T1]], <2 x i32> [[X]] diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll index c555970ea43484..58f171e3889f66 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll @@ -312,7 +312,7 @@ define i32 @n16_oneuse6(i32 %x, i32 %replacement_low, i32 %replacement_high) { define <2 x i32> @t17_ult_slt_vec_splat(<2 x i32> %x, <2 x i32> %replacement_low, <2 x i32> %replacement_high) { ; CHECK-LABEL: @t17_ult_slt_vec_splat( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[X]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[X]], splat (i32 65535) ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[X]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[REPLACEMENT_HIGH:%.*]], <2 x i32> [[TMP3]] ; CHECK-NEXT: ret <2 x i32> [[R]] @@ -341,7 +341,7 @@ define <2 x i32> @t18_ult_slt_vec_nonsplat(<2 x i32> %x, <2 x i32> %replacement_ define <3 x i32> @t19_ult_slt_vec_poison0(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) { ; CHECK-LABEL: @t19_ult_slt_vec_poison0( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <3 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], splat (i32 65535) ; CHECK-NEXT: [[TMP3:%.*]] = select <3 x i1> [[TMP1]], <3 x i32> [[REPLACEMENT_LOW:%.*]], <3 x i32> [[X]] ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP2]], <3 x i32> [[REPLACEMENT_HIGH:%.*]], <3 x i32> [[TMP3]] ; CHECK-NEXT: ret <3 x i32> [[R]] @@ -355,7 +355,7 @@ define <3 x i32> @t19_ult_slt_vec_poison0(<3 x i32> %x, <3 x i32> %replacement_l define <3 x i32> @t20_ult_slt_vec_poison1(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) { ; CHECK-LABEL: @t20_ult_slt_vec_poison1( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <3 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], splat (i32 65535) ; CHECK-NEXT: [[TMP3:%.*]] = select <3 x i1> [[TMP1]], <3 x i32> [[REPLACEMENT_LOW:%.*]], <3 x i32> [[X]] ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP2]], <3 x i32> [[REPLACEMENT_HIGH:%.*]], <3 x i32> [[TMP3]] ; CHECK-NEXT: ret <3 x i32> [[R]] @@ -369,7 +369,7 @@ define <3 x i32> @t20_ult_slt_vec_poison1(<3 x i32> %x, <3 x i32> %replacement_l define <3 x i32> @t21_ult_slt_vec_poison2(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) { ; CHECK-LABEL: @t21_ult_slt_vec_poison2( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <3 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], splat (i32 65535) ; CHECK-NEXT: [[TMP3:%.*]] = select <3 x i1> [[TMP1]], <3 x i32> [[REPLACEMENT_LOW:%.*]], <3 x i32> [[X]] ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP2]], <3 x i32> [[REPLACEMENT_HIGH:%.*]], <3 x i32> [[TMP3]] ; CHECK-NEXT: ret <3 x i32> [[R]] diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll index 3d5696a0245139..8a7f1d259fe479 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll @@ -42,7 +42,7 @@ define i1 @pv(i8 %x, i8 %y) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %tmp0 = and <2 x i8> %x, @@ -81,7 +81,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1(<2 x i8> %x) { define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) { ; CHECK-LABEL: @p3_vec_splat_poison( -; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %tmp0 = and <3 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll index 21daeb8983a85d..5f5a393641f205 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll @@ -42,7 +42,7 @@ define i1 @pv(i8 %x, i8 %y) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %tmp0 = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll index e103fe94409869..0a0656a2273cd0 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll @@ -30,7 +30,7 @@ define i1 @p0(i8 %x) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[RET:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %tmp0 = and <2 x i8> %x, @@ -60,7 +60,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase(<2 x i8> %x) { define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) { ; CHECK-LABEL: @p3_vec_splat_poison( -; CHECK-NEXT: [[RET:%.*]] = icmp slt <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp slt <3 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %tmp0 = and <3 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll index bbd733e86a32de..14d634c493780f 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll @@ -39,7 +39,7 @@ define i1 @p0() { define <2 x i1> @p1_vec_splat() { ; CHECK-LABEL: @p1_vec_splat( ; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @gen2x8() -; CHECK-NEXT: [[RET:%.*]] = icmp sgt <2 x i8> [[X]], +; CHECK-NEXT: [[RET:%.*]] = icmp sgt <2 x i8> [[X]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %x = call <2 x i8> @gen2x8() diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll index b167c8ad25aa94..c563c7c72b9c5c 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll @@ -39,7 +39,7 @@ define i1 @p0() { define <2 x i1> @p1_vec_splat() { ; CHECK-LABEL: @p1_vec_splat( ; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @gen2x8() -; CHECK-NEXT: [[RET:%.*]] = icmp slt <2 x i8> [[X]], +; CHECK-NEXT: [[RET:%.*]] = icmp slt <2 x i8> [[X]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %x = call <2 x i8> @gen2x8() @@ -75,7 +75,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase() { define <3 x i1> @p3_vec_splat_poison() { ; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @gen3x8() -; CHECK-NEXT: [[RET:%.*]] = icmp slt <3 x i8> [[X]], +; CHECK-NEXT: [[RET:%.*]] = icmp slt <3 x i8> [[X]], splat (i8 4) ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %x = call <3 x i8> @gen3x8() diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll index 82815024477328..a5aa887a0cb6ac 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll @@ -30,7 +30,7 @@ define i1 @p0(i8 %x) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[RET:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %tmp0 = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll index 1dac73df387896..c6a69688d0287e 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll @@ -42,7 +42,7 @@ define i1 @pv(i8 %x, i8 %y) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %tmp0 = and <2 x i8> %x, @@ -81,7 +81,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1(<2 x i8> %x) { define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) { ; CHECK-LABEL: @p3_vec_splat_poison( -; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %tmp0 = and <3 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll index e8781407cb647f..1a1a1d08741bed 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll @@ -51,7 +51,7 @@ define i1 @pv(i8 %y) { define <2 x i1> @p1_vec_splat() { ; CHECK-LABEL: @p1_vec_splat( ; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @gen2x8() -; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X]], +; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %x = call <2 x i8> @gen2x8() diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll index 54f00321c4cf02..fe9f26f26a1877 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll @@ -51,7 +51,7 @@ define i1 @pv(i8 %y) { define <2 x i1> @p1_vec_splat() { ; CHECK-LABEL: @p1_vec_splat( ; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @gen2x8() -; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X]], +; CHECK-NEXT: [[RET:%.*]] = icmp ult <2 x i8> [[X]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %x = call <2 x i8> @gen2x8() @@ -98,7 +98,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1() { define <3 x i1> @p3_vec_splat_poison() { ; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @gen3x8() -; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X]], +; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X]], splat (i8 4) ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %x = call <3 x i8> @gen3x8() diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll index 7eda7bb58f2700..9a0f073071b17b 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll @@ -42,7 +42,7 @@ define i1 @pv(i8 %x, i8 %y) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %tmp0 = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-fcmp-inf.ll b/llvm/test/Transforms/InstCombine/canonicalize-fcmp-inf.ll index 297dc647e7c67f..a85d7932f9b7ef 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-fcmp-inf.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-fcmp-inf.ll @@ -180,7 +180,7 @@ define i1 @oge_pinf_fmf(half %x) { define <2 x i1> @olt_pinf_vec(<2 x half> %x) { ; CHECK-LABEL: define <2 x i1> @olt_pinf_vec( ; CHECK-SAME: <2 x half> [[X:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = fcmp one <2 x half> [[X]], +; CHECK-NEXT: [[CMP:%.*]] = fcmp one <2 x half> [[X]], splat (half 0xH7C00) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = fcmp olt <2 x half> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll index b706ff15f569cb..7bc72b8a6c2d9b 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll @@ -44,8 +44,8 @@ define i1 @pb(i65 %x) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 4) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], splat (i8 8) ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %tmp0 = shl <2 x i8> %x, @@ -70,7 +70,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) { define <3 x i1> @p3_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @p3_vec_undef0( ; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], splat (i8 5) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <3 x i8> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i1> [[TMP2]] ; @@ -82,7 +82,7 @@ define <3 x i1> @p3_vec_undef0(<3 x i8> %x) { define <3 x i1> @p4_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @p4_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <3 x i8> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i1> [[TMP2]] @@ -222,7 +222,7 @@ define i1 @n2(i8 %x, i8 %y) { define <2 x i1> @n3_vec_nonsplat(<2 x i8> %x) { ; CHECK-LABEL: @n3_vec_nonsplat( -; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i8> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll index 5a58fc96c6643f..67096e19e9a02f 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll @@ -30,7 +30,7 @@ define i1 @p0(i8 %x, i8 %y) { define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i8> splat (i8 -1), [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp ule <2 x i8> [[X:%.*]], [[TMP0]] ; CHECK-NEXT: ret <2 x i1> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll index edd528b500e557..6f7b5c2a42c2e9 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll @@ -30,7 +30,7 @@ define i1 @p0(i8 %x, i8 %y) { define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i8> splat (i8 -1), [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[TMP0]] ; CHECK-NEXT: ret <2 x i1> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll index fd56324f10dc38..8e1e5a56a59d02 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll @@ -42,7 +42,7 @@ define i1 @p0(i8 %x, i8 %y) { define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[Y:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp eq <2 x i8> [[X_HIGHBITS]], zeroinitializer @@ -74,7 +74,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) { define <3 x i1> @p3_vec_poison0(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @p3_vec_poison0( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> splat (i8 1), [[Y:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll index 4d8ce5d9a6cca0..8d6dfecffcd14a 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll @@ -42,7 +42,7 @@ define i1 @p0(i8 %x, i8 %y) { define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[Y:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i8> [[X_HIGHBITS]], zeroinitializer @@ -74,7 +74,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) { define <3 x i1> @p3_vec_poison0(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @p3_vec_poison0( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> splat (i8 1), [[Y:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll index 5fab93092a050e..12aa83636223e2 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll @@ -40,9 +40,9 @@ define i1 @p0(i8 %x, i8 %y) { define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[Y:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i8> , [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i8> splat (i8 -1), [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ule <2 x i8> [[X:%.*]], [[T1]] ; CHECK-NEXT: ret <2 x i1> [[RET]] ; @@ -58,7 +58,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @p2_vec_poison0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i8> , [[Y:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i8> , [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i8> splat (i8 -1), [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ule <3 x i8> [[X:%.*]], [[T1]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll index 40a67ce1d60cb4..4e840a6fd1b640 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll @@ -40,9 +40,9 @@ define i1 @p0(i8 %x, i8 %y) { define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[Y:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i8> , [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i8> splat (i8 -1), [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[T1]] ; CHECK-NEXT: ret <2 x i1> [[RET]] ; @@ -58,7 +58,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @p2_vec_poison0( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i8> , [[Y:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i8> , [[Y]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i8> splat (i8 -1), [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ugt <3 x i8> [[X:%.*]], [[T1]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll index 719fdd036a01b6..2806bf69a24f2c 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll @@ -371,7 +371,7 @@ define i8 @positive_biggershl_shlnuwnsw_lshrexact(i8 %x) { define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @positive_samevar_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <2 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[RET]] ; @@ -386,7 +386,7 @@ define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec( -; CHECK-NEXT: [[T0:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -8) ; CHECK-NEXT: ret <2 x i8> [[T0]] ; %t0 = lshr <2 x i8> %x, @@ -397,7 +397,7 @@ define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) { define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], splat (i8 3) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %t0 = lshr <3 x i8> %x, @@ -407,7 +407,7 @@ define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) { define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; @@ -428,8 +428,8 @@ define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) { define <2 x i8> @positive_biggerlshr_vec(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggerlshr_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 3) +; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], splat (i8 24) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %t0 = lshr <2 x i8> %x, @@ -440,7 +440,7 @@ define <2 x i8> @positive_biggerlshr_vec(<2 x i8> %x) { define <3 x i8> @positive_biggerlshr_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggerlshr_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], splat (i8 3) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %t0 = lshr <3 x i8> %x, @@ -450,7 +450,7 @@ define <3 x i8> @positive_biggerlshr_vec_undef0(<3 x i8> %x) { define <3 x i8> @positive_biggerlshr_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggerlshr_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], splat (i8 6) ; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; @@ -472,8 +472,8 @@ define <3 x i8> @positive_biggerlshr_vec_undef2(<3 x i8> %x) { define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggershl_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 3) +; CHECK-NEXT: [[RET:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -64) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %t0 = lshr <2 x i8> %x, @@ -484,7 +484,7 @@ define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) { define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggershl_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], splat (i8 6) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %t0 = lshr <3 x i8> %x, @@ -494,7 +494,7 @@ define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) { define <3 x i8> @positive_biggershl_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @positive_biggershl_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[RET:%.*]] = shl <3 x i8> [[T0]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; @@ -563,7 +563,7 @@ define i8 @positive_biggershl_multiuse(i8 %x) { define <2 x i8> @positive_biggerlshr_vec_nonsplat(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggerlshr_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[RET:%.*]] = shl <2 x i8> [[T0]], ; CHECK-NEXT: ret <2 x i8> [[RET]] ; @@ -575,7 +575,7 @@ define <2 x i8> @positive_biggerlshr_vec_nonsplat(<2 x i8> %x) { define <2 x i8> @positive_biggerLlshr_vec_nonsplat(<2 x i8> %x) { ; CHECK-LABEL: @positive_biggerLlshr_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = shl <2 x i8> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = shl <2 x i8> [[T0]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %t0 = lshr <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll index c1e871dcccddcf..400d911929b996 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll @@ -124,7 +124,7 @@ define i32 @positive_biggerLshr_shlnuw_lshrexact(i32 %x) { define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @positive_samevar_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> splat (i32 -1), [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -139,7 +139,7 @@ define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec( -; CHECK-NEXT: [[T0:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 134217727) ; CHECK-NEXT: ret <2 x i32> [[T0]] ; %t0 = shl <2 x i32> %x, @@ -150,7 +150,7 @@ define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) { define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], splat (i32 5) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %t0 = shl <3 x i32> %x, @@ -160,7 +160,7 @@ define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) { define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], splat (i32 5) ; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; @@ -181,8 +181,8 @@ define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) { define <2 x i32> @positive_biggerShl_vec(<2 x i32> %x) { ; CHECK-LABEL: @positive_biggerShl_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 5) +; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[TMP1]], splat (i32 134217696) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %t0 = shl <2 x i32> %x, @@ -193,7 +193,7 @@ define <2 x i32> @positive_biggerShl_vec(<2 x i32> %x) { define <3 x i32> @positive_biggerShl_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerShl_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], splat (i32 5) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %t0 = shl <3 x i32> %x, @@ -203,7 +203,7 @@ define <3 x i32> @positive_biggerShl_vec_undef0(<3 x i32> %x) { define <3 x i32> @positive_biggerShl_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerShl_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], splat (i32 10) ; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; @@ -225,8 +225,8 @@ define <3 x i32> @positive_biggerShl_vec_undef2(<3 x i32> %x) { define <2 x i32> @positive_biggerLshr_vec(<2 x i32> %x) { ; CHECK-LABEL: @positive_biggerLshr_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 5) +; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[TMP1]], splat (i32 4194303) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %t0 = shl <2 x i32> %x, @@ -237,7 +237,7 @@ define <2 x i32> @positive_biggerLshr_vec(<2 x i32> %x) { define <3 x i32> @positive_biggerLshr_vec_undef0(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerLshr_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], splat (i32 10) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %t0 = shl <3 x i32> %x, @@ -247,7 +247,7 @@ define <3 x i32> @positive_biggerLshr_vec_undef0(<3 x i32> %x) { define <3 x i32> @positive_biggerLshr_vec_undef1(<3 x i32> %x) { ; CHECK-LABEL: @positive_biggerLshr_vec_undef1( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], splat (i32 5) ; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[T0]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; @@ -346,7 +346,7 @@ define i32 @positive_biggerLshr_multiuse_extrainstr(i32 %x) { define <2 x i32> @positive_biggerShl_vec_nonsplat(<2 x i32> %x) { ; CHECK-LABEL: @positive_biggerShl_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 5) ; CHECK-NEXT: [[RET:%.*]] = lshr <2 x i32> [[T0]], ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -358,7 +358,7 @@ define <2 x i32> @positive_biggerShl_vec_nonsplat(<2 x i32> %x) { define <2 x i32> @positive_biggerLshl_vec_nonsplat(<2 x i32> %x) { ; CHECK-LABEL: @positive_biggerLshl_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <2 x i32> [[T0]], +; CHECK-NEXT: [[RET:%.*]] = lshr <2 x i32> [[T0]], splat (i32 5) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %t0 = shl <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll index b9875fb9780fa9..5a12ffede78a57 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll @@ -44,8 +44,8 @@ define i1 @pb(i65 %x) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -4) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], splat (i8 -8) ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %tmp0 = shl <2 x i8> %x, @@ -70,7 +70,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) { define <3 x i1> @p3_vec_undef0(<3 x i8> %x) { ; CHECK-LABEL: @p3_vec_undef0( ; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], splat (i8 5) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <3 x i8> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i1> [[TMP2]] ; @@ -82,7 +82,7 @@ define <3 x i1> @p3_vec_undef0(<3 x i8> %x) { define <3 x i1> @p4_vec_undef1(<3 x i8> %x) { ; CHECK-LABEL: @p4_vec_undef1( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <3 x i8> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i1> [[TMP2]] @@ -222,7 +222,7 @@ define i1 @n2(i8 %x, i8 %y) { define <2 x i1> @n3_vec_nonsplat(<2 x i8> %x) { ; CHECK-LABEL: @n3_vec_nonsplat( -; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i8> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] diff --git a/llvm/test/Transforms/InstCombine/canonicalize.ll b/llvm/test/Transforms/InstCombine/canonicalize.ll index 0905898e2f44bc..d571ae93ec425c 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize.ll @@ -316,7 +316,7 @@ define i1 @canonicalize_ueq_arg_f32(float %x) { define <2 x i1> @canonicalize_ueq_arg_v2f32(<2 x float> %x) { ; CHECK-LABEL: @canonicalize_ueq_arg_v2f32( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %canon.x = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %x) %cmp = fcmp ueq <2 x float> %canon.x, %x diff --git a/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll b/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll index 68a386ec004237..dfefa11b70a020 100644 --- a/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll +++ b/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll @@ -533,7 +533,7 @@ define <2 x i1> @i32_vec_cast_cmp_oeq_vec_int_n0_sitofp(<2 x i32> %i) { define <2 x i1> @i32_vec_cast_cmp_oeq_vec_int_i32imax_sitofp(<2 x i32> %i) { ; CHECK-LABEL: @i32_vec_cast_cmp_oeq_vec_int_i32imax_sitofp( ; CHECK-NEXT: [[F:%.*]] = sitofp <2 x i32> [[I:%.*]] to <2 x float> -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[F]], +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[F]], splat (float 0x41E0000000000000) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %f = sitofp <2 x i32> %i to <2 x float> diff --git a/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll b/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll index 7b6d07a14a30e6..e9f6feba24cf67 100644 --- a/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll +++ b/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll @@ -631,7 +631,7 @@ define <3 x i1> @i32_cast_cmp_eq_int_0_sitofp_float_vec_poison(<3 x i32> %i) { define <3 x i1> @i64_cast_cmp_slt_int_1_sitofp_half_vec_poison(<3 x i64> %i) { ; CHECK-LABEL: @i64_cast_cmp_slt_int_1_sitofp_half_vec_poison( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <3 x i64> [[I:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <3 x i64> [[I:%.*]], splat (i64 1) ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = sitofp <3 x i64> %i to <3 x half> @@ -642,7 +642,7 @@ define <3 x i1> @i64_cast_cmp_slt_int_1_sitofp_half_vec_poison(<3 x i64> %i) { define <3 x i1> @i16_cast_cmp_sgt_int_m1_sitofp_float_vec_poison(<3 x i16> %i) { ; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_sitofp_float_vec_poison( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i16> [[I:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i16> [[I:%.*]], splat (i16 -1) ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = sitofp <3 x i16> %i to <3 x float> @@ -659,7 +659,7 @@ define <6 x i1> @i16_cast_cmp_sgt_int_m1_bitcast_vector_num_elements_sitofp(<3 x ; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_bitcast_vector_num_elements_sitofp( ; CHECK-NEXT: [[F:%.*]] = sitofp <3 x i16> [[I:%.*]] to <3 x float> ; CHECK-NEXT: [[B:%.*]] = bitcast <3 x float> [[F]] to <6 x i16> -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <6 x i16> [[B]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <6 x i16> [[B]], splat (i16 -1) ; CHECK-NEXT: ret <6 x i1> [[CMP]] ; %f = sitofp <3 x i16> %i to <3 x float> diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index 0e44dc1b8ca9c0..ca748a9483e9b2 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -223,7 +223,7 @@ define <2 x i1> @test19vec(<2 x i32> %X) { define <3 x i1> @test19vec2(<3 x i1> %X) { ; ALL-LABEL: @test19vec2( -; ALL-NEXT: [[CMPEQ:%.*]] = xor <3 x i1> [[X:%.*]], +; ALL-NEXT: [[CMPEQ:%.*]] = xor <3 x i1> [[X:%.*]], splat (i1 true) ; ALL-NEXT: ret <3 x i1> [[CMPEQ]] ; %sext = sext <3 x i1> %X to <3 x i32> @@ -347,8 +347,8 @@ define i1 @test31(i64 %A) { define <2 x i1> @test31vec(<2 x i64> %A) { ; ALL-LABEL: @test31vec( ; ALL-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = and <2 x i32> [[B]], -; ALL-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[C]], +; ALL-NEXT: [[C:%.*]] = and <2 x i32> [[B]], splat (i32 42) +; ALL-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[C]], splat (i32 10) ; ALL-NEXT: ret <2 x i1> [[D]] ; %B = trunc <2 x i64> %A to <2 x i32> @@ -362,8 +362,8 @@ define <2 x i1> @test31vec(<2 x i64> %A) { define <2 x i1> @test32vec(<2 x i8> %A) { ; ALL-LABEL: @test32vec( -; ALL-NEXT: [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], -; ALL-NEXT: [[D:%.*]] = icmp eq <2 x i8> [[TMP1]], +; ALL-NEXT: [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 42) +; ALL-NEXT: [[D:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 10) ; ALL-NEXT: ret <2 x i1> [[D]] ; %B = zext <2 x i8> %A to <2 x i16> @@ -417,7 +417,7 @@ define i1 @test36(i32 %a) { define <2 x i1> @test36vec(<2 x i32> %a) { ; ALL-LABEL: @test36vec( -; ALL-NEXT: [[D:%.*]] = icmp sgt <2 x i32> [[A:%.*]], +; ALL-NEXT: [[D:%.*]] = icmp sgt <2 x i32> [[A:%.*]], splat (i32 -1) ; ALL-NEXT: ret <2 x i1> [[D]] ; %b = lshr <2 x i32> %a, @@ -480,8 +480,8 @@ define i16 @test40(i16 %a) { define <2 x i16> @test40vec(<2 x i16> %a) { ; ALL-LABEL: @test40vec( -; ALL-NEXT: [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], -; ALL-NEXT: [[T5:%.*]] = shl <2 x i16> [[A]], +; ALL-NEXT: [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], splat (i16 9) +; ALL-NEXT: [[T5:%.*]] = shl <2 x i16> [[A]], splat (i16 8) ; ALL-NEXT: [[T32:%.*]] = or disjoint <2 x i16> [[T21]], [[T5]] ; ALL-NEXT: ret <2 x i16> [[T32]] ; @@ -615,8 +615,8 @@ define i64 @test46(i64 %A) { define <2 x i64> @test46vec(<2 x i64> %A) { ; ALL-LABEL: @test46vec( ; ALL-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], -; ALL-NEXT: [[D:%.*]] = and <2 x i32> [[C]], +; ALL-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], splat (i32 8) +; ALL-NEXT: [[D:%.*]] = and <2 x i32> [[C]], splat (i32 10752) ; ALL-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[D]] to <2 x i64> ; ALL-NEXT: ret <2 x i64> [[E]] ; @@ -773,7 +773,7 @@ define i64 @test56(i16 %A) { define <2 x i64> @test56vec(<2 x i16> %A) { ; ALL-LABEL: @test56vec( ; ALL-NEXT: [[P353:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[P354:%.*]] = lshr <2 x i32> [[P353]], +; ALL-NEXT: [[P354:%.*]] = lshr <2 x i32> [[P353]], splat (i32 5) ; ALL-NEXT: [[P355:%.*]] = zext nneg <2 x i32> [[P354]] to <2 x i64> ; ALL-NEXT: ret <2 x i64> [[P355]] ; @@ -798,7 +798,7 @@ define i64 @test57(i64 %A) { define <2 x i64> @test57vec(<2 x i64> %A) { ; ALL-LABEL: @test57vec( ; ALL-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 8) ; ALL-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[C]] to <2 x i64> ; ALL-NEXT: ret <2 x i64> [[E]] ; @@ -1506,7 +1506,7 @@ define i8 @trunc_lshr_sext_exact(i8 %A) { define <2 x i8> @trunc_lshr_sext_uniform(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_uniform( -; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], splat (i8 6) ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> @@ -1552,7 +1552,7 @@ define <2 x i8> @trunc_lshr_sext_uses1(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_uses1( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A]], +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A]], splat (i8 6) ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> @@ -1581,9 +1581,9 @@ define <2 x i8> @trunc_lshr_sext_uses3(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_uses3( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 6) ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) -; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A]], +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A]], splat (i8 6) ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8 >%A to <2 x i32> @@ -1596,7 +1596,7 @@ define <2 x i8> @trunc_lshr_sext_uses3(<2 x i8> %A) { define <2 x i8> @trunc_lshr_overshift_sext(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift_sext( -; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], splat (i8 7) ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> @@ -1622,9 +1622,9 @@ define i8 @trunc_lshr_overshift_sext_uses1(i8 %A) { define <2 x i8> @trunc_lshr_overshift_sext_uses2(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift_sext_uses2( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 8) ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) -; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A]], +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A]], splat (i8 7) ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> @@ -1679,7 +1679,7 @@ define <2 x i8> @trunc_lshr_sext_wide_input_uses1(<2 x i16> %A) { ; ALL-LABEL: @trunc_lshr_sext_wide_input_uses1( ; ALL-NEXT: [[B:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A]], +; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A]], splat (i16 9) ; ALL-NEXT: [[D:%.*]] = trunc nsw <2 x i16> [[TMP1]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] ; @@ -1709,7 +1709,7 @@ define <2 x i8> @trunc_lshr_sext_wide_input_uses3(<2 x i16> %A) { ; ALL-LABEL: @trunc_lshr_sext_wide_input_uses3( ; ALL-NEXT: [[B:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 9) ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) ; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] @@ -1724,7 +1724,7 @@ define <2 x i8> @trunc_lshr_sext_wide_input_uses3(<2 x i16> %A) { define <2 x i8> @trunc_lshr_overshift_wide_input_sext(<2 x i16> %A) { ; ALL-LABEL: @trunc_lshr_overshift_wide_input_sext( -; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], +; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], splat (i16 15) ; ALL-NEXT: [[D:%.*]] = trunc nsw <2 x i16> [[TMP1]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] ; @@ -1751,7 +1751,7 @@ define i8 @trunc_lshr_overshift_sext_wide_input_uses1(i16 %A) { define <2 x i8> @trunc_lshr_overshift_sext_wide_input_uses2(<2 x i16> %A) { ; ALL-LABEL: @trunc_lshr_overshift_sext_wide_input_uses2( -; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], +; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], splat (i16 15) ; ALL-NEXT: [[C:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) ; ALL-NEXT: [[D:%.*]] = trunc nsw <2 x i16> [[TMP1]] to <2 x i8> @@ -1797,7 +1797,7 @@ define <2 x i16> @trunc_lshr_sext_narrow_input_uses1(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_narrow_input_uses1( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[A]], +; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[A]], splat (i8 6) ; ALL-NEXT: [[D:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> ; ALL-NEXT: ret <2 x i16> [[D]] ; @@ -1827,7 +1827,7 @@ define <2 x i16> @trunc_lshr_sext_narrow_input_uses3(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_sext_narrow_input_uses3( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 6) ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) ; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i16> ; ALL-NEXT: ret <2 x i16> [[D]] @@ -1842,7 +1842,7 @@ define <2 x i16> @trunc_lshr_sext_narrow_input_uses3(<2 x i8> %A) { define <2 x i16> @trunc_lshr_overshift_narrow_input_sext(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift_narrow_input_sext( -; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[A:%.*]], +; ALL-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[A:%.*]], splat (i8 7) ; ALL-NEXT: [[D:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> ; ALL-NEXT: ret <2 x i16> [[D]] ; @@ -1870,7 +1870,7 @@ define i16 @trunc_lshr_overshift_sext_narrow_input_uses1(i8 %A) { define <2 x i16> @trunc_lshr_overshift_sext_narrow_input_uses2(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift_sext_narrow_input_uses2( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 8) ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) ; ALL-NEXT: [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i16> ; ALL-NEXT: ret <2 x i16> [[D]] @@ -1902,7 +1902,7 @@ define i16 @trunc_lshr_overshift_sext_narrow_input_uses3(i8 %A) { define <2 x i8> @trunc_lshr_overshift2_sext(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift2_sext( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 25) ; ALL-NEXT: [[D:%.*]] = trunc nuw nsw <2 x i32> [[C]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] ; @@ -1930,7 +1930,7 @@ define i8 @trunc_lshr_overshift2_sext_uses1(i8 %A) { define <2 x i8> @trunc_lshr_overshift2_sext_uses2(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_overshift2_sext_uses2( ; ALL-NEXT: [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 25) ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[C]]) ; ALL-NEXT: [[D:%.*]] = trunc nuw nsw <2 x i32> [[C]] to <2 x i8> ; ALL-NEXT: ret <2 x i8> [[D]] @@ -1983,7 +1983,7 @@ define i8 @trunc_lshr_zext_exact(i8 %A) { define <2 x i8> @trunc_lshr_zext_uniform(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_zext_uniform( -; ALL-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[A:%.*]], +; ALL-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[A:%.*]], splat (i8 6) ; ALL-NEXT: ret <2 x i8> [[TMP1]] ; %B = zext <2 x i8> %A to <2 x i32> @@ -2029,7 +2029,7 @@ define <2 x i8> @trunc_lshr_zext_uses1(<2 x i8> %A) { ; ALL-LABEL: @trunc_lshr_zext_uses1( ; ALL-NEXT: [[B:%.*]] = zext <2 x i8> [[A:%.*]] to <2 x i32> ; ALL-NEXT: call void @use_v2i32(<2 x i32> [[B]]) -; ALL-NEXT: [[C:%.*]] = lshr <2 x i8> [[A]], +; ALL-NEXT: [[C:%.*]] = lshr <2 x i8> [[A]], splat (i8 6) ; ALL-NEXT: ret <2 x i8> [[C]] ; %B = zext <2 x i8> %A to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll index c6fee0914f0e78..a81259b147fb75 100644 --- a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll +++ b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll @@ -567,8 +567,8 @@ define i32 @mixed_clamp_to_i32_2(float %x) { define <2 x float> @mixed_clamp_to_float_vec(<2 x i32> %x) { ; CHECK-LABEL: @mixed_clamp_to_float_vec( -; CHECK-NEXT: [[SI_MIN:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) -; CHECK-NEXT: [[R1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[SI_MIN]], <2 x i32> ) +; CHECK-NEXT: [[SI_MIN:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 255)) +; CHECK-NEXT: [[R1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[SI_MIN]], <2 x i32> splat (i32 1)) ; CHECK-NEXT: [[R:%.*]] = uitofp nneg <2 x i32> [[R1]] to <2 x float> ; CHECK-NEXT: ret <2 x float> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll b/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll index 67815e41ecd34e..19c4cc979d4ba5 100644 --- a/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll @@ -43,7 +43,7 @@ define i1 @bswap_ne_i32(i32 %x) { define <2 x i1> @bswap_eq_v2i64(<2 x i64> %x) { ; CHECK-LABEL: @bswap_eq_v2i64( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 216172782113783808) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %bs = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %x) @@ -73,7 +73,7 @@ define i1 @ctlz_eq_zero_i32(i32 %x) { define <2 x i1> @ctlz_ne_zero_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @ctlz_ne_zero_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) @@ -93,7 +93,7 @@ define i1 @ctlz_eq_bw_minus_1_i32(i32 %x) { define <2 x i1> @ctlz_ne_bw_minus_1_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @ctlz_ne_bw_minus_1_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) @@ -114,8 +114,8 @@ define i1 @ctlz_eq_other_i32(i32 %x) { define <2 x i1> @ctlz_ne_other_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @ctlz_ne_other_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -128) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 128) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) @@ -211,7 +211,7 @@ define <2 x i1> @ctlz_ult_one_v2i32(<2 x i32> %x) { define <2 x i1> @ctlz_ult_other_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @ctlz_ult_other_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false) @@ -223,7 +223,7 @@ define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, ptr %p) { ; CHECK-LABEL: @ctlz_ult_other_multiuse_v2i32( ; CHECK-NEXT: [[LZ:%.*]] = tail call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false) ; CHECK-NEXT: store <2 x i32> [[LZ]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X]], splat (i32 65535) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false) @@ -234,7 +234,7 @@ define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, ptr %p) { define <2 x i1> @ctlz_ult_bw_minus_one_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @ctlz_ult_bw_minus_one_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false) @@ -285,7 +285,7 @@ define i1 @cttz_eq_zero_i33(i33 %x) { define <2 x i1> @cttz_ne_zero_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @cttz_ne_zero_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -306,7 +306,7 @@ define i1 @cttz_eq_bw_minus_1_i33(i33 %x) { define <2 x i1> @cttz_ne_bw_minus_1_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @cttz_ne_bw_minus_1_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) @@ -327,8 +327,8 @@ define i1 @cttz_eq_other_i33(i33 %x) { define <2 x i1> @cttz_ne_other_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @cttz_ne_other_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 31) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 16) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) @@ -417,7 +417,7 @@ define <2 x i1> @cttz_ult_one_v2i32(<2 x i32> %x) { define <2 x i1> @cttz_ult_other_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @cttz_ult_other_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 65535) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -430,7 +430,7 @@ define <2 x i1> @cttz_ult_other_multiuse_v2i32(<2 x i32> %x, ptr %p) { ; CHECK-LABEL: @cttz_ult_other_multiuse_v2i32( ; CHECK-NEXT: [[TZ:%.*]] = tail call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false) ; CHECK-NEXT: store <2 x i32> [[TZ]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult <2 x i32> [[TZ]], +; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult <2 x i32> [[TZ]], splat (i32 16) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false) @@ -441,7 +441,7 @@ define <2 x i1> @cttz_ult_other_multiuse_v2i32(<2 x i32> %x, ptr %p) { define <2 x i1> @cttz_ult_bw_minus_one_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @cttz_ult_bw_minus_one_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -492,7 +492,7 @@ define i1 @ctpop_eq_bitwidth_i8(i8 %x) { define <2 x i1> @ctpop_ne_bitwidth_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_ne_bitwidth_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %pop = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x) @@ -515,7 +515,7 @@ define i1 @ctpop_ugt_bitwidth_minus_one_i8(i8 %x, ptr %p) { define <2 x i1> @ctpop_ult_bitwidth_v2i32(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_ult_bitwidth_v2i32( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %pop = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x) @@ -792,7 +792,7 @@ define i1 @bitreverse_ult_22_fail_not_equality_pred(i8 %x) { define <2 x i1> @bitreverse_vec_eq_2_2(<2 x i8> %x) { ; CHECK-LABEL: @bitreverse_vec_eq_2_2( -; CHECK-NEXT: [[Z:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Z:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 64) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %y = call <2 x i8> @llvm.bitreverse.v2i8(<2 x i8> %x) diff --git a/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll b/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll index ed8bfd5d669d9e..dcd79f58390023 100644 --- a/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll @@ -203,7 +203,7 @@ define i1 @class_inf_or_fcmp_issubnormal(half %x) { define <2 x i1> @class_finite_or_fcmp_issubnormal_vector(<2 x half> %x) { ; CHECK-LABEL: @class_finite_or_fcmp_issubnormal_vector( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]]) -; CHECK-NEXT: [[OR:%.*]] = fcmp one <2 x half> [[TMP1]], +; CHECK-NEXT: [[OR:%.*]] = fcmp one <2 x half> [[TMP1]], splat (half 0xH7C00) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) diff --git a/llvm/test/Transforms/InstCombine/compare-signs.ll b/llvm/test/Transforms/InstCombine/compare-signs.ll index ede652869212c1..9703b47b44d0c2 100644 --- a/llvm/test/Transforms/InstCombine/compare-signs.ll +++ b/llvm/test/Transforms/InstCombine/compare-signs.ll @@ -50,7 +50,7 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone { define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone { ; CHECK-LABEL: @test3vec( ; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[T2:%.*]] = icmp sgt <2 x i32> [[T2_UNSHIFTED]], +; CHECK-NEXT: [[T2:%.*]] = icmp sgt <2 x i32> [[T2_UNSHIFTED]], splat (i32 -1) ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] ; @@ -64,7 +64,7 @@ define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone { define <2 x i32> @test3vec_poison1(<2 x i32> %a, <2 x i32> %b) nounwind readnone { ; CHECK-LABEL: @test3vec_poison1( ; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], +; CHECK-NEXT: [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], splat (i32 16777216) ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] ; @@ -78,7 +78,7 @@ define <2 x i32> @test3vec_poison1(<2 x i32> %a, <2 x i32> %b) nounwind readnone define <2 x i32> @test3vec_poison2(<2 x i32> %a, <2 x i32> %b) nounwind readnone { ; CHECK-LABEL: @test3vec_poison2( ; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], +; CHECK-NEXT: [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], splat (i32 131072) ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] ; @@ -93,8 +93,8 @@ define <2 x i32> @test3vec_poison2(<2 x i32> %a, <2 x i32> %b) nounwind readnone define <2 x i32> @test3vec_diff(<2 x i32> %a, <2 x i32> %b) nounwind readnone { ; CHECK-LABEL: @test3vec_diff( -; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 31) +; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], splat (i32 30) ; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]] ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] @@ -154,7 +154,7 @@ define i1 @test4a(i32 %a) { define <2 x i1> @test4a_vec(<2 x i32> %a) { ; CHECK-LABEL: @test4a_vec( -; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %l = ashr <2 x i32> %a, @@ -194,7 +194,7 @@ define i1 @test4c(i64 %a) { define <2 x i1> @test4c_vec(<2 x i64> %a) { ; CHECK-LABEL: @test4c_vec( -; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i64> [[A:%.*]], splat (i64 1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %l = ashr <2 x i64> %a, @@ -221,11 +221,11 @@ define i1 @shift_trunc_signbit_test(i32 %x) { define <2 x i1> @shift_trunc_signbit_test_vec_uses(<2 x i17> %x, ptr %p1, ptr %p2) { ; CHECK-LABEL: @shift_trunc_signbit_test_vec_uses( -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i17> [[X:%.*]], +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i17> [[X:%.*]], splat (i17 4) ; CHECK-NEXT: store <2 x i17> [[SH]], ptr [[P1:%.*]], align 8 ; CHECK-NEXT: [[TR:%.*]] = trunc nuw <2 x i17> [[SH]] to <2 x i13> ; CHECK-NEXT: store <2 x i13> [[TR]], ptr [[P2:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i17> [[X]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i17> [[X]], splat (i17 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %sh = lshr <2 x i17> %x, diff --git a/llvm/test/Transforms/InstCombine/compare-udiv.ll b/llvm/test/Transforms/InstCombine/compare-udiv.ll index 20e719cafa6abd..6b8108dc8ecfa9 100644 --- a/llvm/test/Transforms/InstCombine/compare-udiv.ll +++ b/llvm/test/Transforms/InstCombine/compare-udiv.ll @@ -92,7 +92,7 @@ define i1 @test5(i32 %d) { define <2 x i1> @test5vec(<2 x i32> %d) { ; CHECK-LABEL: @test5vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %div = udiv <2 x i32> , %d %cmp1 = icmp ne <2 x i32> %div, zeroinitializer @@ -111,7 +111,7 @@ define i1 @test6(i32 %d) { define <2 x i1> @test6vec(<2 x i32> %d) { ; CHECK-LABEL: @test6vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], splat (i32 6) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -150,7 +150,7 @@ define i1 @test8(i32 %d) { define <2 x i1> @test8vec(<2 x i32> %d) { ; CHECK-LABEL: @test8vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -170,7 +170,7 @@ define i1 @test9(i32 %d) { define <2 x i1> @test9vec(<2 x i32> %d) { ; CHECK-LABEL: @test9vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -190,7 +190,7 @@ define i1 @test10(i32 %d) { define <2 x i1> @test10vec(<2 x i32> %d) { ; CHECK-LABEL: @test10vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -210,7 +210,7 @@ define i1 @test11(i32 %d) { define <2 x i1> @test11vec(<2 x i32> %d) { ; CHECK-LABEL: @test11vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], splat (i32 4) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -230,7 +230,7 @@ define i1 @test12(i32 %d) { define <2 x i1> @test12vec(<2 x i32> %d) { ; CHECK-LABEL: @test12vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -250,7 +250,7 @@ define i1 @test13(i32 %d) { define <2 x i1> @test13vec(<2 x i32> %d) { ; CHECK-LABEL: @test13vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -270,7 +270,7 @@ define i1 @test14(i32 %d) { define <2 x i1> @test14vec(<2 x i32> %d) { ; CHECK-LABEL: @test14vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %div = udiv <2 x i32> , %d @@ -309,7 +309,7 @@ define i1 @test16(i32 %d) { define <2 x i1> @test16vec(<2 x i32> %d) { ; CHECK-LABEL: @test16vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %div = udiv <2 x i32> , %d %cmp1 = icmp ult <2 x i32> %div, diff --git a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll index 22a7c0c072d963..3e5468d2f8bed5 100644 --- a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll +++ b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll @@ -62,7 +62,7 @@ define ptr addrspace(1) @fold_2x_smaller_index_type(ptr addrspace(1) %p, i32 %m0 define <2 x ptr> @fold_2x_vec_i64(<2 x ptr> %p, <2 x i64> %m0) { ; CHECK-LABEL: define <2 x ptr> @fold_2x_vec_i64 ; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M0:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[M0]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[M0]], splat (i64 -2) ; CHECK-NEXT: [[P1:%.*]] = call align 2 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> [[TMP1]]) ; CHECK-NEXT: ret <2 x ptr> [[P1]] ; diff --git a/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll b/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll index a63eeab6a4b1e9..ce3355b6df039e 100644 --- a/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll +++ b/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll @@ -333,7 +333,7 @@ entry: define <4 x half> @copysign_splat(<4 x half> %a) { ; CHECK-LABEL: @copysign_splat( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RET:%.*]] = call <4 x half> @llvm.copysign.v4f16(<4 x half> , <4 x half> [[A:%.*]]) +; CHECK-NEXT: [[RET:%.*]] = call <4 x half> @llvm.copysign.v4f16(<4 x half> splat (half 0xH3C00), <4 x half> [[A:%.*]]) ; CHECK-NEXT: ret <4 x half> [[RET]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll b/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll index 12c608cd6c2bb9..74a1e318d77ede 100644 --- a/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll @@ -1779,7 +1779,7 @@ define i1 @not_isfinite_or_zero_f16_daz(half %x) #1 { define <2 x i1> @not_isfinite_or_zero_v2f16_daz(<2 x half> %x) #1 { ; CHECK-LABEL: @not_isfinite_or_zero_v2f16_daz( ; CHECK-NEXT: [[FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]]) -; CHECK-NEXT: [[CMPINF:%.*]] = fcmp ueq <2 x half> [[FABS]], +; CHECK-NEXT: [[CMPINF:%.*]] = fcmp ueq <2 x half> [[FABS]], splat (half 0xH7C00) ; CHECK-NEXT: [[CMPZERO:%.*]] = fcmp oeq <2 x half> [[X]], zeroinitializer ; CHECK-NEXT: [[CLASS:%.*]] = or <2 x i1> [[CMPZERO]], [[CMPINF]] ; CHECK-NEXT: ret <2 x i1> [[CLASS]] @@ -1810,7 +1810,7 @@ define i1 @not_isfinite_or_zero_f16_dynamic(half %x) #2 { define <2 x i1> @not_isfinite_or_zero_v2f16_dynamic(<2 x half> %x) #2 { ; CHECK-LABEL: @not_isfinite_or_zero_v2f16_dynamic( ; CHECK-NEXT: [[FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]]) -; CHECK-NEXT: [[CMPINF:%.*]] = fcmp ueq <2 x half> [[FABS]], +; CHECK-NEXT: [[CMPINF:%.*]] = fcmp ueq <2 x half> [[FABS]], splat (half 0xH7C00) ; CHECK-NEXT: [[CMPZERO:%.*]] = fcmp oeq <2 x half> [[X]], zeroinitializer ; CHECK-NEXT: [[CLASS:%.*]] = or <2 x i1> [[CMPZERO]], [[CMPINF]] ; CHECK-NEXT: ret <2 x i1> [[CLASS]] diff --git a/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll index 15aa87f72c49a1..bdceeb5ef9785c 100644 --- a/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll +++ b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll @@ -64,7 +64,7 @@ define i32 @shl_cttz_true(i32) { define <2 x i32> @vec2_lshr_ctlz_true(<2 x i32>) { ; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_true( ; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = add <2 x i32> [[TMP0]], +; CHECK-NEXT: [[CTLZ:%.*]] = add <2 x i32> [[TMP0]], splat (i32 9) ; CHECK-NEXT: ret <2 x i32> [[CTLZ]] ; %div = lshr <2 x i32> , %0 @@ -75,7 +75,7 @@ define <2 x i32> @vec2_lshr_ctlz_true(<2 x i32>) { define <2 x i32> @vec2_shl_nuw_ctlz_true(<2 x i32>) { ; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_ctlz_true( ; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> , [[TMP0]] +; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> splat (i32 9), [[TMP0]] ; CHECK-NEXT: ret <2 x i32> [[CTLZ]] ; %shl = shl nuw <2 x i32> , %0 @@ -86,7 +86,7 @@ define <2 x i32> @vec2_shl_nuw_ctlz_true(<2 x i32>) { define <2 x i32> @vec2_shl_nuw_nsw_ctlz_true(<2 x i32>) { ; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_nsw_ctlz_true( ; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) { -; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> , [[TMP0]] +; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> splat (i32 9), [[TMP0]] ; CHECK-NEXT: ret <2 x i32> [[CTLZ]] ; %shl = shl nuw nsw <2 x i32> , %0 diff --git a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll index bcfbce8dfd3d22..4c5d7a7dc011ba 100644 --- a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll +++ b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll @@ -21,7 +21,7 @@ define i32 @ctpop1(i32 %0) { define <2 x i32> @ctpop1v(<2 x i32> %0) { ; CHECK-LABEL: @ctpop1v( ; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false) -; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw <2 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw <2 x i32> splat (i32 32), [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %2 = sub <2 x i32> zeroinitializer, %0 diff --git a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll index 17997b25d096c1..e86d32de51f367 100644 --- a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll +++ b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll @@ -102,7 +102,7 @@ define <2 x i32> @ctpop_shl2_1_vec(<2 x i32> %x) { define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_lshr_intmin_intmin_plus1_vec_nz( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> , [[X1]] ; CHECK-NEXT: [[CNT:%.*]] = call range(i32 1, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHR]]) ; CHECK-NEXT: ret <2 x i32> [[CNT]] @@ -116,7 +116,7 @@ define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) { define <2 x i32> @ctpop_shl2_1_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_shl2_1_vec_nz( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 1) ; %and = and <2 x i32> %x, %shl = shl <2 x i32> , %and @@ -140,7 +140,7 @@ define <2 x i64> @ctpop_x_and_negx_vec(<2 x i64> %x) { define <2 x i32> @ctpop_x_and_negx_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_x_and_negx_vec_nz( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 1) ; %x1 = or <2 x i32> %x, %sub = sub <2 x i32> , %x1 diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll index 940bb868c4c615..217a3c308a2ddb 100644 --- a/llvm/test/Transforms/InstCombine/ctpop.ll +++ b/llvm/test/Transforms/InstCombine/ctpop.ll @@ -105,9 +105,9 @@ define i8 @mask_one_bit(i8 %x) { define <2 x i32> @mask_one_bit_splat(<2 x i32> %x, ptr %p) { ; CHECK-LABEL: @mask_one_bit_splat( -; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 2048) ; CHECK-NEXT: store <2 x i32> [[A]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i32> [[A]], +; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i32> [[A]], splat (i32 11) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = and <2 x i32> %x, @@ -146,7 +146,7 @@ define i7 @_parity_of_not_odd_type(i7 %x) { define <2 x i32> @_parity_of_not_vec(<2 x i32> %x) { ; CHECK-LABEL: @_parity_of_not_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %neg = xor <2 x i32> %x, @@ -158,7 +158,7 @@ define <2 x i32> @_parity_of_not_vec(<2 x i32> %x) { define <2 x i32> @_parity_of_not_poison(<2 x i32> %x) { ; CHECK-LABEL: @_parity_of_not_poison( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %neg = xor <2 x i32> %x, @@ -213,7 +213,7 @@ define i32 @ctpop_add_no_common_bits(i32 %a, i32 %b) { define <2 x i32> @ctpop_add_no_common_bits_vec(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @ctpop_add_no_common_bits_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> splat (i32 16)) ; CHECK-NEXT: [[RES:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[RES]] ; @@ -227,9 +227,9 @@ define <2 x i32> @ctpop_add_no_common_bits_vec(<2 x i32> %a, <2 x i32> %b) { define <2 x i32> @ctpop_add_no_common_bits_vec_use(<2 x i32> %a, <2 x i32> %b, ptr %p) { ; CHECK-LABEL: @ctpop_add_no_common_bits_vec_use( -; CHECK-NEXT: [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 16) ; CHECK-NEXT: [[CTPOP1:%.*]] = tail call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHL16]]) -; CHECK-NEXT: [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], splat (i32 16) ; CHECK-NEXT: [[CTPOP2:%.*]] = tail call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[LSHL16]]) ; CHECK-NEXT: store <2 x i32> [[CTPOP2]], ptr [[P:%.*]], align 8 ; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <2 x i32> [[CTPOP1]], [[CTPOP2]] @@ -246,10 +246,10 @@ define <2 x i32> @ctpop_add_no_common_bits_vec_use(<2 x i32> %a, <2 x i32> %b, p define <2 x i32> @ctpop_add_no_common_bits_vec_use2(<2 x i32> %a, <2 x i32> %b, ptr %p) { ; CHECK-LABEL: @ctpop_add_no_common_bits_vec_use2( -; CHECK-NEXT: [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 16) ; CHECK-NEXT: [[CTPOP1:%.*]] = tail call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHL16]]) ; CHECK-NEXT: store <2 x i32> [[CTPOP1]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], splat (i32 16) ; CHECK-NEXT: [[CTPOP2:%.*]] = tail call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[LSHL16]]) ; CHECK-NEXT: [[RES:%.*]] = add nuw nsw <2 x i32> [[CTPOP1]], [[CTPOP2]] ; CHECK-NEXT: ret <2 x i32> [[RES]] @@ -321,7 +321,7 @@ define i8 @sub_ctpop_unknown(i8 %a, i8 %b) { define <2 x i32> @sub_ctpop_vec(<2 x i32> %a) { ; CHECK-LABEL: @sub_ctpop_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], splat (i32 -1) ; CHECK-NEXT: [[RES:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x i32> [[RES]] ; @@ -334,7 +334,7 @@ define <2 x i32> @sub_ctpop_vec_extra_use(<2 x i32> %a, ptr %p) { ; CHECK-LABEL: @sub_ctpop_vec_extra_use( ; CHECK-NEXT: [[CNT:%.*]] = tail call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[A:%.*]]) ; CHECK-NEXT: store <2 x i32> [[CNT]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[RES:%.*]] = sub nuw nsw <2 x i32> , [[CNT]] +; CHECK-NEXT: [[RES:%.*]] = sub nuw nsw <2 x i32> splat (i32 32), [[CNT]] ; CHECK-NEXT: ret <2 x i32> [[RES]] ; %cnt = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) @@ -412,7 +412,7 @@ define <2 x i32> @parity_xor_vec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-LABEL: @parity_xor_vec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[ARG1:%.*]], [[ARG:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]]) -; CHECK-NEXT: [[I4:%.*]] = and <2 x i32> [[TMP2]], +; CHECK-NEXT: [[I4:%.*]] = and <2 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[I4]] ; %i = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %arg) diff --git a/llvm/test/Transforms/InstCombine/demorgan.ll b/llvm/test/Transforms/InstCombine/demorgan.ll index 460758d512bb38..fa44694b278fa7 100644 --- a/llvm/test/Transforms/InstCombine/demorgan.ll +++ b/llvm/test/Transforms/InstCombine/demorgan.ll @@ -428,7 +428,7 @@ define i32 @demorgan_and_zext(i1 %X, i1 %Y) { define <2 x i32> @demorgan_or_zext_vec(<2 x i1> %X, <2 x i1> %Y) { ; CHECK-LABEL: @demorgan_or_zext_vec( ; CHECK-NEXT: [[OR1_DEMORGAN:%.*]] = and <2 x i1> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[OR1:%.*]] = xor <2 x i1> [[OR1_DEMORGAN]], +; CHECK-NEXT: [[OR1:%.*]] = xor <2 x i1> [[OR1_DEMORGAN]], splat (i1 true) ; CHECK-NEXT: [[OR:%.*]] = zext <2 x i1> [[OR1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[OR]] ; @@ -443,7 +443,7 @@ define <2 x i32> @demorgan_or_zext_vec(<2 x i1> %X, <2 x i1> %Y) { define <2 x i32> @demorgan_and_zext_vec(<2 x i1> %X, <2 x i1> %Y) { ; CHECK-LABEL: @demorgan_and_zext_vec( ; CHECK-NEXT: [[AND1_DEMORGAN:%.*]] = or <2 x i1> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[AND1:%.*]] = xor <2 x i1> [[AND1_DEMORGAN]], +; CHECK-NEXT: [[AND1:%.*]] = xor <2 x i1> [[AND1_DEMORGAN]], splat (i1 true) ; CHECK-NEXT: [[AND:%.*]] = zext <2 x i1> [[AND1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[AND]] ; diff --git a/llvm/test/Transforms/InstCombine/dependent-ivs.ll b/llvm/test/Transforms/InstCombine/dependent-ivs.ll index e4a042ff5fe515..218e619643dfbb 100644 --- a/llvm/test/Transforms/InstCombine/dependent-ivs.ll +++ b/llvm/test/Transforms/InstCombine/dependent-ivs.ll @@ -167,7 +167,7 @@ define void @int_iv_vector(<2 x i64> %base) { ; CHECK-NEXT: [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV2:%.*]] = add <2 x i64> [[IV]], [[BASE]] ; CHECK-NEXT: call void @use.v2i64(<2 x i64> [[IV2]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], splat (i64 4) ; CHECK-NEXT: [[CMP:%.*]] = call i1 @get.i1() ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: @@ -198,7 +198,7 @@ define void @int_iv_vector_poison_invalid(<2 x i64> %base) { ; CHECK-NEXT: [[IV2:%.*]] = phi <2 x i64> [ [[IV2_NEXT:%.*]], [[LOOP]] ], [ [[BASE]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ , [[ENTRY]] ] ; CHECK-NEXT: call void @use.v2i64(<2 x i64> [[IV2]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], splat (i64 4) ; CHECK-NEXT: [[IV2_NEXT]] = add <2 x i64> [[IV_NEXT]], [[BASE]] ; CHECK-NEXT: [[CMP:%.*]] = call i1 @get.i1() ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] @@ -670,7 +670,7 @@ define void @ptr_iv_vector2(<2 x ptr> %base) { ; CHECK-NEXT: [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_PTR:%.*]] = getelementptr i8, <2 x ptr> [[BASE]], <2 x i64> [[IV]] ; CHECK-NEXT: call void @use.v2p0(<2 x ptr> [[IV_PTR]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], splat (i64 4) ; CHECK-NEXT: [[CMP:%.*]] = call i1 @get.i1() ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: diff --git a/llvm/test/Transforms/InstCombine/div-shift.ll b/llvm/test/Transforms/InstCombine/div-shift.ll index 9610746811a43a..8dd6d4a2e83712 100644 --- a/llvm/test/Transforms/InstCombine/div-shift.ll +++ b/llvm/test/Transforms/InstCombine/div-shift.ll @@ -25,7 +25,7 @@ define i32 @t1(i16 zeroext %x, i32 %y) { define <2 x i32> @t1vec(<2 x i16> %x, <2 x i32> %y) { ; CHECK-LABEL: @t1vec( ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 1) ; CHECK-NEXT: [[D1:%.*]] = lshr <2 x i32> [[CONV]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[D1]] ; @@ -236,7 +236,7 @@ define i32 @t10(i32 %x, i32 %y) { define <2 x i32> @t11(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @t11( -; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i32> splat (i32 1), [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shl = shl nsw <2 x i32> %x, %y @@ -287,7 +287,7 @@ define i32 @t15(i32 %x, i32 %y) { define <2 x i32> @t16(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @t16( -; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i32> splat (i32 1), [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shl = shl nuw <2 x i32> %x, %y @@ -568,7 +568,7 @@ define i5 @udiv_shl_mul_nuw_exact(i5 %x, i5 %y, i5 %z) { define <2 x i4> @udiv_shl_mul_nuw_vec(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { ; CHECK-LABEL: @udiv_shl_mul_nuw_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <2 x i4> , [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <2 x i4> splat (i4 1), [[Z:%.*]] ; CHECK-NEXT: [[D:%.*]] = udiv <2 x i4> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i4> [[D]] ; diff --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll index d5d7ce9b7b2636..33a8e12dfa1a68 100644 --- a/llvm/test/Transforms/InstCombine/div.ll +++ b/llvm/test/Transforms/InstCombine/div.ll @@ -88,7 +88,7 @@ define i32 @udiv_by_minus1(i32 %A) { define <2 x i64> @udiv_by_minus1_vec(<2 x i64> %x) { ; CHECK-LABEL: @udiv_by_minus1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 -1) ; CHECK-NEXT: [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[DIV]] ; @@ -109,7 +109,7 @@ define i32 @udiv_by_sext_all_ones(i1 %x, i32 %y) { define <2 x i32> @udiv_by_sext_all_ones_vec(<2 x i1> %x, <2 x i32> %y) { ; CHECK-LABEL: @udiv_by_sext_all_ones_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[Y:%.*]], splat (i32 -1) ; CHECK-NEXT: [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[DIV]] ; @@ -152,8 +152,8 @@ define i1 @test7(i32 %A) { define <2 x i1> @test7vec(<2 x i32> %A) { ; CHECK-LABEL: @test7vec( -; CHECK-NEXT: [[A_OFF:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A_OFF]], +; CHECK-NEXT: [[A_OFF:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -20) +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A_OFF]], splat (i32 10) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = udiv <2 x i32> %A, @@ -174,7 +174,7 @@ define i1 @test8(i8 %A) { define <2 x i1> @test8vec(<2 x i8> %A) { ; CHECK-LABEL: @test8vec( -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i8> [[A:%.*]], splat (i8 -11) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = udiv <2 x i8> %A, @@ -195,7 +195,7 @@ define i1 @test9(i8 %A) { define <2 x i1> @test9vec(<2 x i8> %A) { ; CHECK-LABEL: @test9vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], splat (i8 -10) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = udiv <2 x i8> %A, @@ -266,7 +266,7 @@ define i32 @test15(i32 %a, i32 %b) { define <2 x i64> @test16(<2 x i64> %x) { ; CHECK-LABEL: @test16( -; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i64> [[X:%.*]], splat (i64 192) ; CHECK-NEXT: ret <2 x i64> [[DIV]] ; %shr = lshr <2 x i64> %x, @@ -286,7 +286,7 @@ define i32 @test19(i32 %x) { define <2 x i32> @test19vec(<2 x i32> %x) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[A:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[A]] ; @@ -309,8 +309,8 @@ define i32 @test20(i32 %x) { define <2 x i32> @test20vec(<2 x i32> %x) { ; CHECK-LABEL: @test20vec( ; CHECK-NEXT: [[X_FR:%.*]] = freeze <2 x i32> [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X_FR]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X_FR]], splat (i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 3) ; CHECK-NEXT: [[A:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[X_FR]], <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[A]] ; @@ -441,7 +441,7 @@ define i32 @test32(i32 %a, i32 %b) { define <2 x i64> @test33(<2 x i64> %x) { ; CHECK-LABEL: @test33( -; CHECK-NEXT: [[DIV:%.*]] = udiv exact <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = udiv exact <2 x i64> [[X:%.*]], splat (i64 192) ; CHECK-NEXT: ret <2 x i64> [[DIV]] ; %shr = lshr exact <2 x i64> %x, @@ -463,7 +463,7 @@ define i8 @sdiv_negated_dividend_constant_divisor(i8 %x) { define <2 x i8> @sdiv_negated_dividend_constant_divisor_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_negated_dividend_constant_divisor_vec_splat( -; CHECK-NEXT: [[D:%.*]] = sdiv <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[D:%.*]] = sdiv <2 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: ret <2 x i8> [[D]] ; %neg = sub nsw <2 x i8> zeroinitializer, %x @@ -483,7 +483,7 @@ define i8 @sdiv_exact_negated_dividend_constant_divisor(i8 %x) { define <2 x i8> @sdiv_exact_negated_dividend_constant_divisor_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_exact_negated_dividend_constant_divisor_vec_splat( -; CHECK-NEXT: [[D:%.*]] = sdiv exact <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[D:%.*]] = sdiv exact <2 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: ret <2 x i8> [[D]] ; %neg = sub nsw <2 x i8> zeroinitializer, %x @@ -504,7 +504,7 @@ define i8 @sdiv_negated_dividend_constant_divisor_smin(i8 %x) { define <2 x i8> @sdiv_negated_dividend_constant_divisor_vec_splat_smin(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_negated_dividend_constant_divisor_vec_splat_smin( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; @@ -568,8 +568,8 @@ define i32 @test35(i32 %A) { define <2 x i32> @test35vec(<2 x i32> %A) { ; CHECK-LABEL: @test35vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = udiv exact <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647) +; CHECK-NEXT: [[MUL:%.*]] = udiv exact <2 x i32> [[AND]], splat (i32 2147483647) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %and = and <2 x i32> %A, @@ -591,7 +591,7 @@ define i32 @test36(i32 %A) { define <2 x i32> @test36vec(<2 x i32> %A) { ; CHECK-LABEL: @test36vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[MUL:%.*]] = lshr exact <2 x i32> [[AND]], [[A]] ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; @@ -656,7 +656,7 @@ define i32 @zap(i8 %x) { define <3 x i32> @shrink_vec(<3 x i8> %x) { ; CHECK-LABEL: @shrink_vec( -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 127) ; CHECK-NEXT: [[DIV:%.*]] = sext <3 x i8> [[TMP1]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[DIV]] ; @@ -667,7 +667,7 @@ define <3 x i32> @shrink_vec(<3 x i8> %x) { define <2 x i32> @zap_vec(<2 x i8> %x) { ; CHECK-LABEL: @zap_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[DIV]] ; @@ -740,8 +740,8 @@ define i8 @div_factor_signed(i8 %x, i8 %y) { define <2 x i8> @div_factor_signed_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @div_factor_signed_vec( ; CHECK-NEXT: [[Y_FR:%.*]] = freeze <2 x i8> [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y_FR]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y_FR]], splat (i8 1) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], splat (i8 3) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i8> [[Y_FR]], <2 x i8> zeroinitializer ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -767,7 +767,7 @@ define i8 @div_factor_unsigned(i8 %x, i8 %y) { define <2 x i8> @div_factor_unsigned_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @div_factor_unsigned_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[Y:%.*]], splat (i8 1) ; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -874,7 +874,7 @@ define <2 x i64> @test_exact_vec(<2 x i64> %x) { define <2 x i8> @negate_sdiv_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @negate_sdiv_vec_splat( -; CHECK-NEXT: [[DIV_NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[DIV_NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], splat (i8 -42) ; CHECK-NEXT: ret <2 x i8> [[DIV_NEG]] ; %div = sdiv <2 x i8> %x, @@ -909,7 +909,7 @@ define <2 x i8> @negate_sdiv_vec_splat_one(<2 x i8> %x) { define <2 x i8> @negate_sdiv_vec_splat_signed_min(<2 x i8> %x) { ; CHECK-LABEL: @negate_sdiv_vec_splat_signed_min( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: [[DIV_NEG:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[DIV_NEG]] ; @@ -1033,7 +1033,7 @@ define i8 @sdiv_by_int_min(i8 %x) { define <2 x i8> @sdiv_by_int_min_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_by_int_min_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; @@ -1051,7 +1051,7 @@ define <2 x i8> @sdiv_by_int_min_vec_splat_poison(<2 x i8> %x) { define <2 x i8> @sdiv_by_negconst_v2i8(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_by_negconst_v2i8( -; CHECK-NEXT: [[DIV_NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[DIV_NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], splat (i8 108) ; CHECK-NEXT: ret <2 x i8> [[DIV_NEG]] ; %div = sdiv <2 x i8> %x, @@ -1071,7 +1071,7 @@ define @sdiv_by_negconst_nxv2i8( %x) { define <2 x i8> @sdiv_by_minSigned_v2i8(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_by_minSigned_v2i8( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: [[DIV_NEG:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[DIV_NEG]] ; @@ -1151,7 +1151,7 @@ define i32 @sdiv_constant_dividend_select_divisor2(i1 %b, i32 %x) { define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec(i1 %b) { ; CHECK-LABEL: @sdiv_constant_dividend_select_of_constants_divisor_vec( -; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> , <2 x i8> +; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> , <2 x i8> splat (i8 -10) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %s = select i1 %b, <2 x i8> , <2 x i8> @@ -1163,7 +1163,7 @@ define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec(i1 %b) { define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec_ub1(i1 %b) { ; CHECK-LABEL: @sdiv_constant_dividend_select_of_constants_divisor_vec_ub1( -; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> , <2 x i8> +; CHECK-NEXT: [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> , <2 x i8> splat (i8 -10) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %s = select i1 %b, <2 x i8> , <2 x i8> @@ -1745,7 +1745,7 @@ define i32 @sdiv_distribute_mul_nsw_add_nsw_uses(i32 %x) { define <2 x i6> @udiv_distribute_mul_nuw_add_nuw(<2 x i6> %x) { ; CHECK-LABEL: @udiv_distribute_mul_nuw_add_nuw( -; CHECK-NEXT: [[DIV:%.*]] = add nuw <2 x i6> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = add nuw <2 x i6> [[X:%.*]], splat (i6 5) ; CHECK-NEXT: ret <2 x i6> [[DIV]] ; %mul = mul nuw <2 x i6> %x, diff --git a/llvm/test/Transforms/InstCombine/eq-of-parts.ll b/llvm/test/Transforms/InstCombine/eq-of-parts.ll index afe5d6af1fcd49..00ee7bf643286f 100644 --- a/llvm/test/Transforms/InstCombine/eq-of-parts.ll +++ b/llvm/test/Transforms/InstCombine/eq-of-parts.ll @@ -179,9 +179,9 @@ define i1 @eq_21_comm_eq2(i32 %x, i32 %y) { define <2x i1> @eq_21_vector(<2x i32> %x, <2x i32> %y) { ; CHECK-LABEL: @eq_21_vector( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[Y:%.*]], splat (i32 8) ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16> ; CHECK-NEXT: [[C_210:%.*]] = icmp eq <2 x i16> [[TMP2]], [[TMP4]] ; CHECK-NEXT: ret <2 x i1> [[C_210]] @@ -835,9 +835,9 @@ define i1 @ne_21_comm_ne2(i32 %x, i32 %y) { define <2x i1> @ne_21_vector(<2x i32> %x, <2x i32> %y) { ; CHECK-LABEL: @ne_21_vector( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[Y:%.*]], splat (i32 8) ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16> ; CHECK-NEXT: [[C_210:%.*]] = icmp ne <2 x i16> [[TMP2]], [[TMP4]] ; CHECK-NEXT: ret <2 x i1> [[C_210]] diff --git a/llvm/test/Transforms/InstCombine/exact.ll b/llvm/test/Transforms/InstCombine/exact.ll index ccad6f974ffb1e..c7377ab17d540e 100644 --- a/llvm/test/Transforms/InstCombine/exact.ll +++ b/llvm/test/Transforms/InstCombine/exact.ll @@ -21,7 +21,7 @@ define i32 @sdiv2(i32 %x) { define <2 x i32> @sdiv2_vec(<2 x i32> %x) { ; CHECK-LABEL: @sdiv2_vec( -; CHECK-NEXT: [[Y:%.*]] = ashr exact <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = ashr exact <2 x i32> [[X:%.*]], splat (i32 7) ; CHECK-NEXT: ret <2 x i32> [[Y]] ; %y = sdiv exact <2 x i32> %x, @@ -105,8 +105,8 @@ define i64 @ashr1(i64 %X) { define <2 x i64> @ashr1_vec(<2 x i64> %X) { ; CHECK-LABEL: @ashr1_vec( -; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], +; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[X:%.*]], splat (i64 8) +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], splat (i64 2) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %A = shl <2 x i64> %X, @@ -137,7 +137,7 @@ define i1 @ashr_icmp2(i64 %X) { define <2 x i1> @ashr_icmp2_vec(<2 x i64> %X) { ; CHECK-LABEL: @ashr_icmp2_vec( -; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i64> [[X:%.*]], splat (i64 16) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %Y = ashr exact <2 x i64> %X, @@ -163,10 +163,10 @@ define i1 @pr9998(i32 %V) { ; FIXME: Vectors should fold the same way. define <2 x i1> @pr9998vec(<2 x i32> %V) { ; CHECK-LABEL: @pr9998vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: [[X:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP1]] ; CHECK-NEXT: [[Y:%.*]] = sext <2 x i32> [[X]] to <2 x i64> -; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i64> [[Y]], +; CHECK-NEXT: [[Z:%.*]] = icmp ugt <2 x i64> [[Y]], splat (i64 7297771788697658747) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %W = shl <2 x i32> %V, @@ -248,7 +248,7 @@ define i1 @sdiv_icmp2(i64 %X) { define <2 x i1> @sdiv_icmp2_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp2_vec( -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 5) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %A = sdiv exact <2 x i64> %X, @@ -268,7 +268,7 @@ define i1 @sdiv_icmp3(i64 %X) { define <2 x i1> @sdiv_icmp3_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp3_vec( -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 -5) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %A = sdiv exact <2 x i64> %X, @@ -308,7 +308,7 @@ define i1 @sdiv_icmp5(i64 %X) { define <2 x i1> @sdiv_icmp5_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp5_vec( -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 -5) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %A = sdiv exact <2 x i64> %X, @@ -328,7 +328,7 @@ define i1 @sdiv_icmp6(i64 %X) { define <2 x i1> @sdiv_icmp6_vec(<2 x i64> %X) { ; CHECK-LABEL: @sdiv_icmp6_vec( -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 5) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %A = sdiv exact <2 x i64> %X, @@ -402,7 +402,7 @@ define i8 @mul_of_sdiv_fail_ub(i8 %x) { define <2 x i8> @mul_of_sdiv_fail_ub_non_splat(<2 x i8> %x) { ; CHECK-LABEL: @mul_of_sdiv_fail_ub_non_splat( ; CHECK-NEXT: [[DIV:%.*]] = sdiv exact <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i8> [[DIV]], +; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i8> [[DIV]], splat (i8 6) ; CHECK-NEXT: ret <2 x i8> [[MUL]] ; %div = sdiv exact <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/exp2-1.ll b/llvm/test/Transforms/InstCombine/exp2-1.ll index d8bd0a4d8159db..0502540b7f7e92 100644 --- a/llvm/test/Transforms/InstCombine/exp2-1.ll +++ b/llvm/test/Transforms/InstCombine/exp2-1.ll @@ -309,22 +309,22 @@ define float @sitofp_scalar_intrinsic_with_FMF(i8 %x) { define <2 x float> @sitofp_vector_intrinsic_with_FMF(<2 x i8> %x) { ; LDEXP32-LABEL: @sitofp_vector_intrinsic_with_FMF( ; LDEXP32-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32> -; LDEXP32-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) +; LDEXP32-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]]) ; LDEXP32-NEXT: ret <2 x float> [[R]] ; ; LDEXP16-LABEL: @sitofp_vector_intrinsic_with_FMF( ; LDEXP16-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i16> -; LDEXP16-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float> , <2 x i16> [[TMP1]]) +; LDEXP16-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float> splat (float 1.000000e+00), <2 x i16> [[TMP1]]) ; LDEXP16-NEXT: ret <2 x float> [[R]] ; ; NOLDEXPF-LABEL: @sitofp_vector_intrinsic_with_FMF( ; NOLDEXPF-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32> -; NOLDEXPF-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) +; NOLDEXPF-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]]) ; NOLDEXPF-NEXT: ret <2 x float> [[R]] ; ; NOLDEXP-LABEL: @sitofp_vector_intrinsic_with_FMF( ; NOLDEXP-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32> -; NOLDEXP-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) +; NOLDEXP-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]]) ; NOLDEXP-NEXT: ret <2 x float> [[R]] ; %s = sitofp <2 x i8> %x to <2 x float> diff --git a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll index 969020140cb339..c2c7ece8483a9e 100644 --- a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll +++ b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll @@ -30,7 +30,7 @@ define <2 x float> @exp2_v2f32_sitofp_v2i8(<2 x i8> %x) { ; CHECK-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8( ; CHECK-SAME: <2 x i8> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32> -; CHECK-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) +; CHECK-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x float> [[EXP2]] ; %itofp = sitofp <2 x i8> %x to <2 x float> diff --git a/llvm/test/Transforms/InstCombine/fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fabs-as-int.ll index 4e49ff159f875d..9d28cae8f04d63 100644 --- a/llvm/test/Transforms/InstCombine/fabs-as-int.ll +++ b/llvm/test/Transforms/InstCombine/fabs-as-int.ll @@ -19,7 +19,7 @@ define <2 x i32> @fabs_as_int_v2f32_noimplicitfloat(<2 x float> %x) noimplicitfl ; CHECK-LABEL: define <2 x i32> @fabs_as_int_v2f32_noimplicitfloat ; CHECK-SAME: (<2 x float> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[BC]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[BC]], splat (i32 2147483647) ; CHECK-NEXT: ret <2 x i32> [[AND]] ; %bc = bitcast <2 x float> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/fabs-copysign.ll b/llvm/test/Transforms/InstCombine/fabs-copysign.ll index 438760b76f2039..e1d9f8bf2a4b98 100644 --- a/llvm/test/Transforms/InstCombine/fabs-copysign.ll +++ b/llvm/test/Transforms/InstCombine/fabs-copysign.ll @@ -31,7 +31,7 @@ define double @fabs_copysign_commuted(double %x) { define <4 x double> @fabs_copysign_vec(<4 x double> %x) { ; CHECK-LABEL: @fabs_copysign_vec( -; CHECK-NEXT: [[DIV:%.*]] = call nnan ninf <4 x double> @llvm.copysign.v4f64(<4 x double> , <4 x double> [[X:%.*]]) +; CHECK-NEXT: [[DIV:%.*]] = call nnan ninf <4 x double> @llvm.copysign.v4f64(<4 x double> splat (double 1.000000e+00), <4 x double> [[X:%.*]]) ; CHECK-NEXT: ret <4 x double> [[DIV]] ; %f = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) @@ -41,7 +41,7 @@ define <4 x double> @fabs_copysign_vec(<4 x double> %x) { define <4 x double> @fabs_copysign_vec_commuted(<4 x double> %x) { ; CHECK-LABEL: @fabs_copysign_vec_commuted( -; CHECK-NEXT: [[DIV:%.*]] = call nnan ninf <4 x double> @llvm.copysign.v4f64(<4 x double> , <4 x double> [[X:%.*]]) +; CHECK-NEXT: [[DIV:%.*]] = call nnan ninf <4 x double> @llvm.copysign.v4f64(<4 x double> splat (double 1.000000e+00), <4 x double> [[X:%.*]]) ; CHECK-NEXT: ret <4 x double> [[DIV]] ; %f = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) diff --git a/llvm/test/Transforms/InstCombine/fabs-fneg-fold.ll b/llvm/test/Transforms/InstCombine/fabs-fneg-fold.ll index a1988e5d1638d7..dd8d0aed3210e1 100644 --- a/llvm/test/Transforms/InstCombine/fabs-fneg-fold.ll +++ b/llvm/test/Transforms/InstCombine/fabs-fneg-fold.ll @@ -137,7 +137,7 @@ define float @fabs_fneg_no_fabs(float %x) { define <2 x float> @fabs_fneg_splat_v2f32(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @fabs_fneg_splat_v2f32( ; CHECK-SAME: <2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 2.000000e+00) ; %neg = fneg <2 x float> %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %neg) diff --git a/llvm/test/Transforms/InstCombine/fadd.ll b/llvm/test/Transforms/InstCombine/fadd.ll index 840ccaef1086ab..36c387969d05d6 100644 --- a/llvm/test/Transforms/InstCombine/fadd.ll +++ b/llvm/test/Transforms/InstCombine/fadd.ll @@ -478,7 +478,7 @@ define float @fadd_fmul_common_op(float %x) { define <2 x float> @fadd_fmul_common_op_vec(<2 x float> %x) { ; CHECK-LABEL: @fadd_fmul_common_op_vec( -; CHECK-NEXT: [[A:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], splat (float 4.300000e+01) ; CHECK-NEXT: ret <2 x float> [[A]] ; %m = fmul reassoc nsz <2 x float> %x, diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll index 32f136d53fab4b..5a80cf56712035 100644 --- a/llvm/test/Transforms/InstCombine/fast-math.ll +++ b/llvm/test/Transforms/InstCombine/fast-math.ll @@ -587,7 +587,7 @@ define float @fdiv2(float %x) { define <2 x float> @fdiv2_vec(<2 x float> %x) { ; CHECK-LABEL: @fdiv2_vec( -; CHECK-NEXT: [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], +; CHECK-NEXT: [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: ret <2 x float> [[DIV1]] ; %mul = fmul <2 x float> %x, diff --git a/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll b/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll index 54dbb09cb8fd31..5a181ead4016ca 100644 --- a/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll +++ b/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll @@ -186,7 +186,7 @@ define <2 x i1> @test_and_olt_poison(<2 x float> %x) { ; CHECK-LABEL: define <2 x i1> @test_and_olt_poison( ; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) -; CHECK-NEXT: [[COND:%.*]] = fcmp olt <2 x float> [[TMP1]], +; CHECK-NEXT: [[COND:%.*]] = fcmp olt <2 x float> [[TMP1]], splat (float 0x3C00000000000000) ; CHECK-NEXT: ret <2 x i1> [[COND]] ; %cmp1 = fcmp olt <2 x float> %x, diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll index 0d45baddcb2fc7..119cffd73c662c 100644 --- a/llvm/test/Transforms/InstCombine/fcmp.ll +++ b/llvm/test/Transforms/InstCombine/fcmp.ll @@ -32,7 +32,7 @@ define i1 @fpext_constant(float %a) { define <2 x i1> @fpext_constant_vec_splat(<2 x half> %a) { ; CHECK-LABEL: @fpext_constant_vec_splat( -; CHECK-NEXT: [[CMP:%.*]] = fcmp nnan ole <2 x half> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = fcmp nnan ole <2 x half> [[A:%.*]], splat (half 0xH5140) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %ext = fpext <2 x half> %a to <2 x double> @@ -780,7 +780,7 @@ define i1 @lossy_une(half %x) { define <2 x i1> @lossy_ogt(<2 x float> %x) { ; CHECK-LABEL: @lossy_ogt( ; CHECK-NEXT: [[E:%.*]] = fpext <2 x float> [[X:%.*]] to <2 x double> -; CHECK-NEXT: [[R:%.*]] = fcmp ogt <2 x double> [[E]], +; CHECK-NEXT: [[R:%.*]] = fcmp ogt <2 x double> [[E]], splat (double 1.000000e-01) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %e = fpext <2 x float> %x to <2 x double> @@ -826,7 +826,7 @@ define i1 @lossy_ole(half %x) { define <2 x i1> @lossy_ugt(<2 x float> %x) { ; CHECK-LABEL: @lossy_ugt( ; CHECK-NEXT: [[E:%.*]] = fpext <2 x float> [[X:%.*]] to <2 x double> -; CHECK-NEXT: [[R:%.*]] = fcmp ugt <2 x double> [[E]], +; CHECK-NEXT: [[R:%.*]] = fcmp ugt <2 x double> [[E]], splat (double 1.000000e-01) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %e = fpext <2 x float> %x to <2 x double> @@ -1222,7 +1222,7 @@ define i1 @bitcast_eq0(i32 %x) { define <2 x i1> @bitcast_ne0(<2 x i32> %x) { ; CHECK-LABEL: @bitcast_ne0( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll index 12d6e6463de657..06c78a8c6206dc 100644 --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -78,7 +78,7 @@ define float @not_exact_but_allow_recip_but_denorm(float %x) { define <2 x float> @exact_inverse_splat(<2 x float> %x) { ; CHECK-LABEL: @exact_inverse_splat( -; CHECK-NEXT: [[DIV:%.*]] = fmul <2 x float> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = fmul <2 x float> [[X:%.*]], splat (float 2.500000e-01) ; CHECK-NEXT: ret <2 x float> [[DIV]] ; %div = fdiv <2 x float> %x, @@ -98,7 +98,7 @@ define @exact_inverse_scalable_splat( % define <2 x float> @not_exact_but_allow_recip_splat(<2 x float> %x) { ; CHECK-LABEL: @not_exact_but_allow_recip_splat( -; CHECK-NEXT: [[DIV:%.*]] = fmul arcp <2 x float> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = fmul arcp <2 x float> [[X:%.*]], splat (float 0x3FD5555560000000) ; CHECK-NEXT: ret <2 x float> [[DIV]] ; %div = fdiv arcp <2 x float> %x, @@ -116,7 +116,7 @@ define <2 x float> @exact_inverse_vec(<2 x float> %x) { define <2 x float> @not_exact_inverse_splat(<2 x float> %x) { ; CHECK-LABEL: @not_exact_inverse_splat( -; CHECK-NEXT: [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: ret <2 x float> [[DIV]] ; %div = fdiv <2 x float> %x, @@ -465,7 +465,7 @@ define float @div_factor_too_strict(float %x, float %y) { define <2 x float> @div_factor_commute(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @div_factor_commute( -; CHECK-NEXT: [[D:%.*]] = fdiv reassoc nnan ninf nsz <2 x float> , [[Y:%.*]] +; CHECK-NEXT: [[D:%.*]] = fdiv reassoc nnan ninf nsz <2 x float> splat (float 1.000000e+00), [[Y:%.*]] ; CHECK-NEXT: ret <2 x float> [[D]] ; %m = fmul <2 x float> %y, %x @@ -959,7 +959,7 @@ define float @fdiv_nnan_zero_f32(float %x) { define <2 x float> @fdiv_nnan_zero_v2f32(<2 x float> %x) { ; CHECK-LABEL: @fdiv_nnan_zero_v2f32( -; CHECK-NEXT: [[FDIV:%.*]] = call nnan <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: [[FDIV:%.*]] = call nnan <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 0x7FF0000000000000), <2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[FDIV]] ; %fdiv = fdiv nnan <2 x float> %x, zeroinitializer @@ -977,7 +977,7 @@ define float @fdiv_nnan_zero_f32_fmf(float %x) { define <2 x float> @fdiv_nnan_zero_v2f32_fmf(<2 x float> %x) { ; CHECK-LABEL: @fdiv_nnan_zero_v2f32_fmf( -; CHECK-NEXT: [[FDIV:%.*]] = call nnan nsz <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: [[FDIV:%.*]] = call nnan nsz <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 0x7FF0000000000000), <2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[FDIV]] ; %fdiv = fdiv nnan nsz <2 x float> %x, zeroinitializer @@ -1031,7 +1031,7 @@ define double @test_negative_zero(double %X) { define <2 x double> @test_positive_zero_vector_nsz(<2 x double> %X) { ; CHECK-LABEL: @test_positive_zero_vector_nsz( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz <2 x double> @llvm.copysign.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 0x7FF0000000000000), <2 x double> [[X:%.*]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = fdiv nnan nsz <2 x double> %X, @@ -1040,7 +1040,7 @@ define <2 x double> @test_positive_zero_vector_nsz(<2 x double> %X) { define <2 x double> @test_negative_zero_vector_nsz(<2 x double> %X) { ; CHECK-LABEL: @test_negative_zero_vector_nsz( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz <2 x double> @llvm.copysign.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 0x7FF0000000000000), <2 x double> [[X:%.*]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = fdiv nnan nsz <2 x double> %X, @@ -1049,7 +1049,7 @@ define <2 x double> @test_negative_zero_vector_nsz(<2 x double> %X) { define <2 x double> @test_positive_zero_vector(<2 x double> %X) { ; CHECK-LABEL: @test_positive_zero_vector( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan <2 x double> @llvm.copysign.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call nnan <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 0x7FF0000000000000), <2 x double> [[X:%.*]]) ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = fdiv nnan <2 x double> %X, diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll index 4590bbb7abaeb8..ae0067d41426cf 100644 --- a/llvm/test/Transforms/InstCombine/fma.ll +++ b/llvm/test/Transforms/InstCombine/fma.ll @@ -587,7 +587,7 @@ define <2 x double> @fma_const_fmul_one2(<2 x double> %b) { define <2 x double> @fma_nan_and_const_0(<2 x double> %b) { ; CHECK-LABEL: @fma_nan_and_const_0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> , <2 x double> , <2 x double> %b) ret <2 x double> %res @@ -595,7 +595,7 @@ define <2 x double> @fma_nan_and_const_0(<2 x double> %b) { define <2 x double> @fma_nan_and_const_1(<2 x double> %b) { ; CHECK-LABEL: @fma_nan_and_const_1( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> , <2 x double> , <2 x double> %b) ret <2 x double> %res @@ -603,7 +603,7 @@ define <2 x double> @fma_nan_and_const_1(<2 x double> %b) { define <2 x double> @fma_nan_and_const_2(<2 x double> %b) { ; CHECK-LABEL: @fma_nan_and_const_2( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> , <2 x double> %b, <2 x double> ) ret <2 x double> %res @@ -611,7 +611,7 @@ define <2 x double> @fma_nan_and_const_2(<2 x double> %b) { define <2 x double> @fma_undef_0(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fma_undef_0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> , <2 x double> %b, <2 x double> %c) ret <2 x double> %res @@ -619,7 +619,7 @@ define <2 x double> @fma_undef_0(<2 x double> %b, <2 x double> %c) { define <2 x double> @fma_undef_1(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fma_undef_1( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> , <2 x double> %c) ret <2 x double> %res @@ -627,7 +627,7 @@ define <2 x double> @fma_undef_1(<2 x double> %b, <2 x double> %c) { define <2 x double> @fma_undef_2(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fma_undef_2( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> ) ret <2 x double> %res @@ -663,14 +663,14 @@ define <2 x double> @fma_partial_undef_2(<2 x double> %b, <2 x double> %c) { define <2 x double> @fma_nan_0(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fma_nan_0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> , <2 x double> %b, <2 x double> %c) ret <2 x double> %res } define <2 x double> @fma_nan_1(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fma_nan_1( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> , <2 x double> %c) ret <2 x double> %res @@ -678,7 +678,7 @@ define <2 x double> @fma_nan_1(<2 x double> %b, <2 x double> %c) { define <2 x double> @fma_nan_2(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fma_nan_2( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> ) ret <2 x double> %res @@ -695,7 +695,7 @@ define <2 x double> @fmuladd_const_fmul(<2 x double> %b) { define <2 x double> @fmuladd_nan_and_const_0(<2 x double> %b) { ; CHECK-LABEL: @fmuladd_nan_and_const_0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> , <2 x double> , <2 x double> %b) ret <2 x double> %res @@ -703,7 +703,7 @@ define <2 x double> @fmuladd_nan_and_const_0(<2 x double> %b) { define <2 x double> @fmuladd_nan_and_const_1(<2 x double> %b) { ; CHECK-LABEL: @fmuladd_nan_and_const_1( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> , <2 x double> , <2 x double> %b) ret <2 x double> %res @@ -711,7 +711,7 @@ define <2 x double> @fmuladd_nan_and_const_1(<2 x double> %b) { define <2 x double> @fmuladd_nan_and_const_2(<2 x double> %b) { ; CHECK-LABEL: @fmuladd_nan_and_const_2( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> , <2 x double> %b, <2 x double> ) ret <2 x double> %res @@ -719,7 +719,7 @@ define <2 x double> @fmuladd_nan_and_const_2(<2 x double> %b) { define <2 x double> @fmuladd_nan_0(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fmuladd_nan_0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> , <2 x double> %b, <2 x double> %c) ret <2 x double> %res @@ -727,7 +727,7 @@ define <2 x double> @fmuladd_nan_0(<2 x double> %b, <2 x double> %c) { define <2 x double> @fmuladd_nan_1(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fmuladd_nan_1( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> , <2 x double> %c) ret <2 x double> %res @@ -735,7 +735,7 @@ define <2 x double> @fmuladd_nan_1(<2 x double> %b, <2 x double> %c) { define <2 x double> @fmuladd_undef_0(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fmuladd_undef_0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> , <2 x double> %b, <2 x double> %c) ret <2 x double> %res @@ -743,7 +743,7 @@ define <2 x double> @fmuladd_undef_0(<2 x double> %b, <2 x double> %c) { define <2 x double> @fmuladd_undef_1(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fmuladd_undef_1( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> , <2 x double> %c) ret <2 x double> %res @@ -751,7 +751,7 @@ define <2 x double> @fmuladd_undef_1(<2 x double> %b, <2 x double> %c) { define <2 x double> @fmuladd_undef_2(<2 x double> %b, <2 x double> %c) { ; CHECK-LABEL: @fmuladd_undef_2( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF8000000000000) ; %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> ) ret <2 x double> %res diff --git a/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll b/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll index 716781564ca70f..3c598f91fad410 100644 --- a/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll @@ -10,7 +10,7 @@ define void @test8(ptr %inout, i1 %c1) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ , [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ] ; CHECK-NEXT: br i1 [[C1:%.*]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[TMP0]] = insertelement <4 x float> [[LOCAL_VAR_7_0]], float 0.000000e+00, i64 2 diff --git a/llvm/test/Transforms/InstCombine/fmul-sqrt.ll b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll index 72ac2f18f113a4..2166db8f5c4e59 100644 --- a/llvm/test/Transforms/InstCombine/fmul-sqrt.ll +++ b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll @@ -118,7 +118,7 @@ define <2 x float> @x_add_y_rsqrt_reassociate_extra_use(<2 x float> %x, <2 x flo ; CHECK-LABEL: @x_add_y_rsqrt_reassociate_extra_use( ; CHECK-NEXT: [[ADD:%.*]] = fadd fast <2 x float> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[SQRT:%.*]] = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ADD]]) -; CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x float> , [[SQRT]] +; CHECK-NEXT: [[RSQRT:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[SQRT]] ; CHECK-NEXT: [[RES:%.*]] = fdiv fast <2 x float> [[ADD]], [[SQRT]] ; CHECK-NEXT: store <2 x float> [[RSQRT]], ptr [[P:%.*]], align 8 ; CHECK-NEXT: ret <2 x float> [[RES]] diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll index 4554b4ed8844de..51b70ef7e98004 100644 --- a/llvm/test/Transforms/InstCombine/fmul.ll +++ b/llvm/test/Transforms/InstCombine/fmul.ll @@ -377,7 +377,7 @@ define void @test8(ptr %inout, i1 %c1) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ , [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ] ; CHECK-NEXT: br i1 [[C1:%.*]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[TMP0]] = insertelement <4 x float> [[LOCAL_VAR_7_0]], float 0.000000e+00, i64 2 @@ -821,8 +821,8 @@ define float @fmul_fadd_distribute(float %x) { define <2 x float> @fmul_fadd_distribute_vec(<2 x float> %x) { ; CHECK-LABEL: @fmul_fadd_distribute_vec( -; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <2 x float> [[X:%.*]], -; CHECK-NEXT: [[T3:%.*]] = fadd reassoc <2 x float> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <2 x float> [[X:%.*]], splat (float 6.000000e+03) +; CHECK-NEXT: [[T3:%.*]] = fadd reassoc <2 x float> [[TMP1]], splat (float 1.200000e+07) ; CHECK-NEXT: ret <2 x float> [[T3]] ; %t1 = fadd reassoc <2 x float> , %x @@ -1164,7 +1164,7 @@ define float @negate_if_false(float %x, i1 %cond) { define <2 x double> @negate_if_true_commute(<2 x double> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_commute( -; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] +; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> splat (double 4.200000e+01), [[PX:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = fneg ninf <2 x double> [[X]] ; CHECK-NEXT: [[R:%.*]] = select ninf i1 [[COND:%.*]], <2 x double> [[TMP1]], <2 x double> [[X]] ; CHECK-NEXT: ret <2 x double> [[R]] @@ -1207,8 +1207,8 @@ define float @negate_if_true_extra_use(float %x, i1 %cond) { define <2 x double> @negate_if_true_wrong_constant(<2 x double> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_wrong_constant( -; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x double> , <2 x double> +; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> splat (double 4.200000e+01), [[PX:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x double> , <2 x double> splat (double 1.000000e+00) ; CHECK-NEXT: [[R:%.*]] = fmul <2 x double> [[X]], [[SEL]] ; CHECK-NEXT: ret <2 x double> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/fneg-as-int.ll b/llvm/test/Transforms/InstCombine/fneg-as-int.ll index e3067b0d024614..f8d88b4f238f27 100644 --- a/llvm/test/Transforms/InstCombine/fneg-as-int.ll +++ b/llvm/test/Transforms/InstCombine/fneg-as-int.ll @@ -19,7 +19,7 @@ define <2 x i32> @fneg_as_int_v2f32_noimplicitfloat(<2 x float> %x) noimplicitfl ; CHECK-LABEL: define <2 x i32> @fneg_as_int_v2f32_noimplicitfloat ; CHECK-SAME: (<2 x float> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[BC]], +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[BC]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i32> [[XOR]] ; %bc = bitcast <2 x float> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll index 8c3e6958fe083e..8b245bdd792993 100644 --- a/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll +++ b/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll @@ -17,7 +17,7 @@ define <2 x i32> @fneg_fabs_as_int_v2f32_noimplicitfloat(<2 x float> %x) noimpli ; CHECK-LABEL: define <2 x i32> @fneg_fabs_as_int_v2f32_noimplicitfloat ; CHECK-SAME: (<2 x float> [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[BC]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[BC]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i32> [[OR]] ; %bc = bitcast <2 x float> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll index 3461b19d0d7002..f28262b2a77e04 100644 --- a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll +++ b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll @@ -47,7 +47,7 @@ define <4 x float> @h1(i1 %A, <4 x i32> %B) { ; CHECK-LABEL: @h1( ; CHECK-NEXT: EntryBlock: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> -; CHECK-NEXT: [[BC:%.*]] = select i1 [[A:%.*]], <4 x float> , <4 x float> [[TMP0]] +; CHECK-NEXT: [[BC:%.*]] = select i1 [[A:%.*]], <4 x float> splat (float 0x36A0000000000000), <4 x float> [[TMP0]] ; CHECK-NEXT: ret <4 x float> [[BC]] ; EntryBlock: diff --git a/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll b/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll index 6d5fde364c2353..0ed9927325d929 100644 --- a/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll +++ b/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll @@ -144,7 +144,7 @@ define <2 x i1> @fold_cmp_ne_ctpop_c(<2 x i8> %x) { define <2 x i1> @fold_cmp_ne_ctpop_var_fail(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @fold_cmp_ne_ctpop_var_fail( -; CHECK-NEXT: [[NX:%.*]] = xor <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[NX:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[CNT:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]]) ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[CNT]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -214,7 +214,7 @@ define <2 x i1> @fold_cmp_ugt_ctpop_c(<2 x i8> %x) { define <2 x i1> @fold_cmp_ugt_ctpop_c_out_of_range_fail(<2 x i8> %x) { ; CHECK-LABEL: @fold_cmp_ugt_ctpop_c_out_of_range_fail( -; CHECK-NEXT: [[NX:%.*]] = xor <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[NX:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[CNT:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]]) ; CHECK-NEXT: [[R:%.*]] = icmp samesign ugt <2 x i8> [[CNT]], ; CHECK-NEXT: ret <2 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll index caf38c676e20d7..ff6d9aa4ea522e 100644 --- a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll +++ b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll @@ -63,7 +63,7 @@ define float @ldexp_by_5_if_0_oeq_zero_f32(float %x) { define <2 x float> @ldexp_by_5_if_0_oeq_zero_v2f32(<2 x float> %x) { ; CHECK-LABEL: @ldexp_by_5_if_0_oeq_zero_v2f32( ; CHECK-NEXT: [[X_IS_ZERO:%.*]] = fcmp oeq <2 x float> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[SCALED_X:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[SCALED_X:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> splat (i32 5)) ; CHECK-NEXT: [[SCALED_IF_DENORMAL:%.*]] = select <2 x i1> [[X_IS_ZERO]], <2 x float> [[SCALED_X]], <2 x float> [[X]] ; CHECK-NEXT: ret <2 x float> [[SCALED_IF_DENORMAL]] ; diff --git a/llvm/test/Transforms/InstCombine/fold-select-trunc.ll b/llvm/test/Transforms/InstCombine/fold-select-trunc.ll index 5567d7d5e1fca9..0c7ca0c4fd0059 100644 --- a/llvm/test/Transforms/InstCombine/fold-select-trunc.ll +++ b/llvm/test/Transforms/InstCombine/fold-select-trunc.ll @@ -59,7 +59,7 @@ define i8 @fold_select_trunc_negative(i8 %x, i8 %y) { define <2 x i8> @fold_select_trunc_vector(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @fold_select_trunc_vector( ; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw <2 x i8> [[X:%.*]] to <2 x i1> -; CHECK-NEXT: [[RET:%.*]] = select <2 x i1> [[TRUNC]], <2 x i8> , <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[RET:%.*]] = select <2 x i1> [[TRUNC]], <2 x i8> splat (i8 1), <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %trunc = trunc nuw <2 x i8> %x to <2 x i1> diff --git a/llvm/test/Transforms/InstCombine/fold-signbit-test-power2.ll b/llvm/test/Transforms/InstCombine/fold-signbit-test-power2.ll index a5c7cb3306ed08..be02eb6dd33463 100644 --- a/llvm/test/Transforms/InstCombine/fold-signbit-test-power2.ll +++ b/llvm/test/Transforms/InstCombine/fold-signbit-test-power2.ll @@ -38,8 +38,8 @@ define i1 @pow2_or_zero_is_negative_commute(i8 %A) { define <2 x i1> @pow2_or_zero_is_negative_vec(<2 x i8> %x) { ; CHECK-LABEL: @pow2_or_zero_is_negative_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq <2 x i8> [[X]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128) +; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq <2 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: call void @use_i1_vec(<2 x i1> [[CMP_2]]) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -53,8 +53,8 @@ define <2 x i1> @pow2_or_zero_is_negative_vec(<2 x i8> %x) { define <2 x i1> @pow2_or_zero_is_negative_vec_commute(<2 x i8> %A) { ; CHECK-LABEL: @pow2_or_zero_is_negative_vec_commute( -; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X]], +; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = mul <2 x i8> , %A ; thwart complexity-based canonicalization @@ -94,8 +94,8 @@ define i1 @pow2_or_zero_is_not_negative_commute(i8 %A) { define <2 x i1> @pow2_or_zero_is_not_negative_vec(<2 x i8> %x) { ; CHECK-LABEL: @pow2_or_zero_is_not_negative_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ne <2 x i8> [[X]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[X:%.*]], splat (i8 -128) +; CHECK-NEXT: [[CMP_2:%.*]] = icmp ne <2 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: call void @use_i1_vec(<2 x i1> [[CMP_2]]) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -109,8 +109,8 @@ define <2 x i1> @pow2_or_zero_is_not_negative_vec(<2 x i8> %x) { define <2 x i1> @pow2_or_zero_is_not_negative_vec_commute(<2 x i8> %A) { ; CHECK-LABEL: @pow2_or_zero_is_not_negative_vec_commute( -; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[X]], +; CHECK-NEXT: [[X:%.*]] = mul <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = mul <2 x i8> , %A ; thwart complexity-based canonicalization diff --git a/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll b/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll index af580ba57513c2..3bd341ffafb58b 100644 --- a/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll +++ b/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll @@ -41,7 +41,7 @@ define i8 @p0_scalar_not_truly_negatable(i8 %x, i8 %y) { define <4 x i32> @p1_vector_splat(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @p1_vector_splat( -; CHECK-NEXT: [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[T0_NEG]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; @@ -52,7 +52,7 @@ define <4 x i32> @p1_vector_splat(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @p2_vector_poison(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @p2_vector_poison( -; CHECK-NEXT: [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[T0_NEG]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; @@ -109,7 +109,7 @@ define i32 @n5_is_not_not(i32 %x, i32 %y) { define <2 x i32> @n5_is_not_not_vec_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @n5_is_not_not_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = xor <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[T1:%.*]] = sub <2 x i32> [[Y:%.*]], [[T0]] ; CHECK-NEXT: ret <2 x i32> [[T1]] ; diff --git a/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll b/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll index 4034759df06048..66970a9d48ddf1 100644 --- a/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll +++ b/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll @@ -253,7 +253,7 @@ define <2 x i1> @f32_fcnan_fcinf_vec(<2 x float> %a) { ; CHECK-LABEL: define <2 x i1> @f32_fcnan_fcinf_vec( ; CHECK-SAME: <2 x float> [[A:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[A]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq <2 x float> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq <2 x float> [[TMP1]], splat (float 0x7FF0000000000000) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %i32 = bitcast <2 x float> %a to <2 x i32> @@ -278,7 +278,7 @@ define <2 x i1> @f32_fcinf_vec(<2 x float> %a) { ; CHECK-LABEL: define <2 x i1> @f32_fcinf_vec( ; CHECK-SAME: <2 x float> [[A:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[A]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[TMP1]], splat (float 0x7FF0000000000000) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %i32 = bitcast <2 x float> %a to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll index f1fba6cb272f9f..79d3360377e506 100644 --- a/llvm/test/Transforms/InstCombine/fsh.ll +++ b/llvm/test/Transforms/InstCombine/fsh.ll @@ -62,7 +62,7 @@ define i33 @fshr_mask_simplify1(i33 %x, i33 %y, i33 %sh) { define <2 x i31> @fshl_mask_simplify2(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) { ; CHECK-LABEL: @fshl_mask_simplify2( -; CHECK-NEXT: [[MASKEDSH:%.*]] = and <2 x i31> [[SH:%.*]], +; CHECK-NEXT: [[MASKEDSH:%.*]] = and <2 x i31> [[SH:%.*]], splat (i31 32) ; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> [[MASKEDSH]]) ; CHECK-NEXT: ret <2 x i31> [[R]] ; @@ -138,7 +138,7 @@ define <2 x i32> @fshr_set_but_not_demanded_vec(<2 x i32> %x, <2 x i32> %y, <2 x define <2 x i31> @fshl_set_but_not_demanded_vec(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) { ; CHECK-LABEL: @fshl_set_but_not_demanded_vec( -; CHECK-NEXT: [[BOGUSBITS:%.*]] = or <2 x i31> [[SH:%.*]], +; CHECK-NEXT: [[BOGUSBITS:%.*]] = or <2 x i31> [[SH:%.*]], splat (i31 32) ; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> [[BOGUSBITS]]) ; CHECK-NEXT: ret <2 x i31> [[R]] ; @@ -223,7 +223,7 @@ define i33 @fshr_op1_zero(i33 %x) { define <2 x i31> @fshl_op0_zero_splat_vec(<2 x i31> %x) { ; CHECK-LABEL: @fshl_op0_zero_splat_vec( -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i31> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i31> [[X:%.*]], splat (i31 24) ; CHECK-NEXT: ret <2 x i31> [[R]] ; %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> zeroinitializer, <2 x i31> %x, <2 x i31> ) @@ -232,7 +232,7 @@ define <2 x i31> @fshl_op0_zero_splat_vec(<2 x i31> %x) { define <2 x i31> @fshl_op1_undef_splat_vec(<2 x i31> %x) { ; CHECK-LABEL: @fshl_op1_undef_splat_vec( -; CHECK-NEXT: [[R:%.*]] = shl <2 x i31> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = shl <2 x i31> [[X:%.*]], splat (i31 7) ; CHECK-NEXT: ret <2 x i31> [[R]] ; %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> undef, <2 x i31> ) @@ -241,7 +241,7 @@ define <2 x i31> @fshl_op1_undef_splat_vec(<2 x i31> %x) { define <2 x i32> @fshr_op0_undef_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @fshr_op0_undef_splat_vec( -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 7) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> ) @@ -250,7 +250,7 @@ define <2 x i32> @fshr_op0_undef_splat_vec(<2 x i32> %x) { define <2 x i32> @fshr_op1_zero_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @fshr_op1_zero_splat_vec( -; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 25) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> ) @@ -341,7 +341,7 @@ define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) { define <2 x i31> @fshl_only_op1_demanded_vec_splat(<2 x i31> %x, <2 x i31> %y) { ; CHECK-LABEL: @fshl_only_op1_demanded_vec_splat( -; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], +; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], splat (i31 24) ; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], ; CHECK-NEXT: ret <2 x i31> [[R]] ; @@ -466,7 +466,7 @@ define i33 @rotr_common_demanded(i33 %a0) { define <2 x i31> @fshl_only_op1_demanded_vec_nonsplat(<2 x i31> %x, <2 x i31> %y) { ; CHECK-LABEL: @fshl_only_op1_demanded_vec_nonsplat( -; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], +; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], splat (i31 24) ; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], ; CHECK-NEXT: ret <2 x i31> [[R]] ; @@ -486,7 +486,7 @@ define i32 @rotl_constant_shift_amount(i32 %x) { define <2 x i31> @rotl_constant_shift_amount_vec(<2 x i31> %x) { ; CHECK-LABEL: @rotl_constant_shift_amount_vec( -; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[X]], <2 x i31> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[X]], <2 x i31> splat (i31 1)) ; CHECK-NEXT: ret <2 x i31> [[R]] ; %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %x, <2 x i31> ) @@ -852,7 +852,7 @@ entry: define <2 x i31> @fshr_mask_args_same_vector(<2 x i31> %a) { ; CHECK-LABEL: @fshr_mask_args_same_vector( -; CHECK-NEXT: [[T3:%.*]] = shl <2 x i31> [[A:%.*]], +; CHECK-NEXT: [[T3:%.*]] = shl <2 x i31> [[A:%.*]], splat (i31 10) ; CHECK-NEXT: ret <2 x i31> [[T3]] ; %t1 = and <2 x i31> %a, @@ -864,7 +864,7 @@ define <2 x i31> @fshr_mask_args_same_vector(<2 x i31> %a) { define <2 x i32> @fshr_mask_args_same_vector2(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @fshr_mask_args_same_vector2( ; CHECK-NEXT: [[T1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[T3:%.*]] = lshr exact <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = lshr exact <2 x i32> [[T1]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t1 = and <2 x i32> %a, @@ -875,7 +875,7 @@ define <2 x i32> @fshr_mask_args_same_vector2(<2 x i32> %a, <2 x i32> %b) { define <2 x i31> @fshr_mask_args_same_vector3_different_but_still_prunable(<2 x i31> %a) { ; CHECK-LABEL: @fshr_mask_args_same_vector3_different_but_still_prunable( -; CHECK-NEXT: [[T1:%.*]] = and <2 x i31> [[A:%.*]], +; CHECK-NEXT: [[T1:%.*]] = and <2 x i31> [[A:%.*]], splat (i31 1000) ; CHECK-NEXT: [[T3:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[A]], <2 x i31> [[T1]], <2 x i31> ) ; CHECK-NEXT: ret <2 x i31> [[T3]] ; diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll index fa0d59b2269983..0e5f0469264c7c 100644 --- a/llvm/test/Transforms/InstCombine/funnel.ll +++ b/llvm/test/Transforms/InstCombine/funnel.ll @@ -34,7 +34,7 @@ define i42 @fshr_i42_constant(i42 %x, i42 %y) { define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: @fshl_v2i16_constant_splat( -; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> splat (i16 1)) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -45,7 +45,7 @@ define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) { define <2 x i16> @fshl_v2i16_constant_splat_poison0(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: @fshl_v2i16_constant_splat_poison0( -; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> splat (i16 1)) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -56,7 +56,7 @@ define <2 x i16> @fshl_v2i16_constant_splat_poison0(<2 x i16> %x, <2 x i16> %y) define <2 x i16> @fshl_v2i16_constant_splat_poison1(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: @fshl_v2i16_constant_splat_poison1( -; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> splat (i16 1)) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -69,7 +69,7 @@ define <2 x i16> @fshl_v2i16_constant_splat_poison1(<2 x i16> %x, <2 x i16> %y) define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat( -; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> splat (i17 5)) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shr = lshr <2 x i17> %x, @@ -80,7 +80,7 @@ define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) { define <2 x i17> @fshr_v2i17_constant_splat_poison0(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat_poison0( -; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> splat (i17 5)) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shr = lshr <2 x i17> %x, @@ -91,7 +91,7 @@ define <2 x i17> @fshr_v2i17_constant_splat_poison0(<2 x i17> %x, <2 x i17> %y) define <2 x i17> @fshr_v2i17_constant_splat_poison1(<2 x i17> %x, <2 x i17> %y) { ; CHECK-LABEL: @fshr_v2i17_constant_splat_poison1( -; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> splat (i17 5)) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shr = lshr <2 x i17> %x, @@ -481,11 +481,11 @@ define i32 @fshl_concat_unknown_source(i32 %zext.x, i32 %zext.y, ptr %addr) { define <2 x i32> @fshl_concat_vector(<2 x i8> %x, <2 x i24> %y, ptr %addr) { ; CHECK-LABEL: @fshl_concat_vector( ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[SLX:%.*]] = shl nuw <2 x i32> [[ZEXT_X]], +; CHECK-NEXT: [[SLX:%.*]] = shl nuw <2 x i32> [[ZEXT_X]], splat (i32 24) ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext <2 x i24> [[Y:%.*]] to <2 x i32> ; CHECK-NEXT: [[XY:%.*]] = or disjoint <2 x i32> [[SLX]], [[ZEXT_Y]] ; CHECK-NEXT: store <2 x i32> [[XY]], ptr [[ADDR:%.*]], align 4 -; CHECK-NEXT: [[YX:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[XY]], <2 x i32> [[XY]], <2 x i32> ) +; CHECK-NEXT: [[YX:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[XY]], <2 x i32> [[XY]], <2 x i32> splat (i32 8)) ; CHECK-NEXT: ret <2 x i32> [[YX]] ; %zext.x = zext <2 x i8> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll index 1cb7cf99bea325..5f22a354570157 100644 --- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -97,8 +97,8 @@ define void @PR37005(ptr %base, ptr %in) { ; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8, ptr [[E1]], i64 48 ; CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds ptr, ptr [[E2]], <2 x i64> ; CHECK-NEXT: [[PI1:%.*]] = ptrtoint <2 x ptr> [[E4]] to <2 x i64> -; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], -; CHECK-NEXT: [[SL1:%.*]] = and <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], splat (i64 14) +; CHECK-NEXT: [[SL1:%.*]] = and <2 x i64> [[TMP0]], splat (i64 1125899906842496) ; CHECK-NEXT: [[E5:%.*]] = getelementptr inbounds i8, ptr [[BASE:%.*]], <2 x i64> [[SL1]] ; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x ptr> [[E5]], i64 80 ; CHECK-NEXT: call void @blackhole(<2 x ptr> [[E6]]) @@ -159,8 +159,8 @@ define void @PR37005_3(<2 x ptr> %base, ptr %in) { ; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8, ptr [[E1]], i64 48 ; CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds ptr, ptr [[E2]], <2 x i64> ; CHECK-NEXT: [[PI1:%.*]] = ptrtoint <2 x ptr> [[E4]] to <2 x i64> -; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], -; CHECK-NEXT: [[SL1:%.*]] = and <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], splat (i64 14) +; CHECK-NEXT: [[SL1:%.*]] = and <2 x i64> [[TMP0]], splat (i64 1125899906842496) ; CHECK-NEXT: [[E5:%.*]] = getelementptr inbounds i8, <2 x ptr> [[BASE:%.*]], <2 x i64> [[SL1]] ; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x ptr> [[E5]], i64 80 ; CHECK-NEXT: call void @blackhole(<2 x ptr> [[E6]]) @@ -189,7 +189,7 @@ define void @PR51485(<2 x i64> %v) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[SL1:%.*]] = shl nuw nsw <2 x i64> [[V:%.*]], +; CHECK-NEXT: [[SL1:%.*]] = shl nuw nsw <2 x i64> [[V:%.*]], splat (i64 7) ; CHECK-NEXT: [[E5:%.*]] = getelementptr inbounds i8, ptr @PR51485, <2 x i64> [[SL1]] ; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x ptr> [[E5]], i64 80 ; CHECK-NEXT: call void @blackhole(<2 x ptr> [[E6]]) diff --git a/llvm/test/Transforms/InstCombine/gep-custom-dl.ll b/llvm/test/Transforms/InstCombine/gep-custom-dl.ll index e8eaf4e24f7e42..c1c11c1acc7bbc 100644 --- a/llvm/test/Transforms/InstCombine/gep-custom-dl.ll +++ b/llvm/test/Transforms/InstCombine/gep-custom-dl.ll @@ -99,7 +99,7 @@ define i1 @test5(ptr %x, ptr %y) { define <2 x i1> @test6(<2 x i32> %X, <2 x ptr> %P) nounwind { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = getelementptr inbounds %S, <2 x ptr> %P, <2 x i32> zeroinitializer, <2 x i32> , <2 x i32> %X @@ -111,7 +111,7 @@ define <2 x i1> @test6(<2 x i32> %X, <2 x ptr> %P) nounwind { ; Same as above, but indices scalarized. define <2 x i1> @test6b(<2 x i32> %X, <2 x ptr> %P) nounwind { ; CHECK-LABEL: @test6b( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = getelementptr inbounds %S, <2 x ptr> %P, i32 0, i32 1, <2 x i32> %X diff --git a/llvm/test/Transforms/InstCombine/gep-vector.ll b/llvm/test/Transforms/InstCombine/gep-vector.ll index 4d20323b789674..d8a65b69aceff7 100644 --- a/llvm/test/Transforms/InstCombine/gep-vector.ll +++ b/llvm/test/Transforms/InstCombine/gep-vector.ll @@ -5,7 +5,7 @@ define <2 x ptr> @vectorindex1() { ; CHECK-LABEL: @vectorindex1( -; CHECK-NEXT: ret <2 x ptr> getelementptr inbounds ([64 x [8192 x i8]], ptr @block, <2 x i64> zeroinitializer, <2 x i64> , <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> getelementptr inbounds ([64 x [8192 x i8]], ptr @block, <2 x i64> zeroinitializer, <2 x i64> , <2 x i64> splat (i64 8192)) ; %1 = getelementptr inbounds [64 x [8192 x i8]], ptr @block, i64 0, <2 x i64> , i64 8192 ret <2 x ptr> %1 @@ -13,7 +13,7 @@ define <2 x ptr> @vectorindex1() { define <2 x ptr> @vectorindex2() { ; CHECK-LABEL: @vectorindex2( -; CHECK-NEXT: ret <2 x ptr> getelementptr inbounds ([64 x [8192 x i8]], ptr @block, <2 x i64> zeroinitializer, <2 x i64> , <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> getelementptr inbounds ([64 x [8192 x i8]], ptr @block, <2 x i64> zeroinitializer, <2 x i64> splat (i64 1), <2 x i64> ) ; %1 = getelementptr inbounds [64 x [8192 x i8]], ptr @block, i64 0, i64 1, <2 x i64> ret <2 x ptr> %1 diff --git a/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll b/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll index 40caa57891369f..81a148b745906e 100644 --- a/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll +++ b/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll @@ -32,8 +32,8 @@ define i16 @t1(i16 %x) { ; Vectors define <2 x i8> @t2_vec(<2 x i8> %x) { ; CHECK-LABEL: @t2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = lshr <2 x i8> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i8> splat (i8 7), [[X:%.*]] +; CHECK-NEXT: [[MASK:%.*]] = lshr <2 x i8> splat (i8 -1), [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[MASK]] ; %bitmask = shl <2 x i8> , %x @@ -43,8 +43,8 @@ define <2 x i8> @t2_vec(<2 x i8> %x) { } define <3 x i8> @t3_vec_poison0(<3 x i8> %x) { ; CHECK-LABEL: @t3_vec_poison0( -; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> splat (i8 7), [[X:%.*]] +; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> splat (i8 -1), [[TMP1]] ; CHECK-NEXT: ret <3 x i8> [[MASK]] ; %bitmask = shl <3 x i8> , %x @@ -54,8 +54,8 @@ define <3 x i8> @t3_vec_poison0(<3 x i8> %x) { } define <3 x i8> @t4_vec_poison1(<3 x i8> %x) { ; CHECK-LABEL: @t4_vec_poison1( -; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> splat (i8 7), [[X:%.*]] +; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> splat (i8 -1), [[TMP1]] ; CHECK-NEXT: ret <3 x i8> [[MASK]] ; %bitmask = shl <3 x i8> , %x @@ -65,8 +65,8 @@ define <3 x i8> @t4_vec_poison1(<3 x i8> %x) { } define <3 x i8> @t5_vec_poison2(<3 x i8> %x) { ; CHECK-LABEL: @t5_vec_poison2( -; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> splat (i8 7), [[X:%.*]] +; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> splat (i8 -1), [[TMP1]] ; CHECK-NEXT: ret <3 x i8> [[MASK]] ; %bitmask = shl <3 x i8> , %x diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index 6a5f77e8cf6e08..8f28049cf5f581 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -222,7 +222,7 @@ define i1 @test13(i64 %X, ptr %P) { define <2 x i1> @test13_vector(<2 x i64> %X, <2 x ptr> %P) nounwind { ; CHECK-LABEL: @test13_vector( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 -1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = getelementptr inbounds %S, <2 x ptr> %P, <2 x i64> zeroinitializer, <2 x i32> , <2 x i64> %X @@ -250,7 +250,7 @@ define <2 x i1> @test13_fixed_fixed(i64 %X, ptr %P, <2 x i64> %y) nounwind { ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], ; CHECK-NEXT: [[A_IDX:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw <2 x i64> [[Y:%.*]], splat (i64 4) ; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[A_IDX]], [[B_IDX]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -325,7 +325,7 @@ define i1 @test13_as1(i16 %X, ptr addrspace(1) %P) { define <2 x i1> @test13_vector_as1(<2 x i16> %X, <2 x ptr addrspace(1)> %P) { ; CHECK-LABEL: @test13_vector_as1( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i16> [[X:%.*]], splat (i16 -1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = getelementptr inbounds %S, <2 x ptr addrspace(1)> %P, <2 x i16> , <2 x i32> , <2 x i16> %X diff --git a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll index 3ebab115f65439..5786c30fc7423a 100644 --- a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll +++ b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll @@ -48,7 +48,7 @@ define i32 @t3_exact(i64 %x) { define <2 x i32> @t4(<2 x i64> %x) { ; CHECK-LABEL: @t4( -; CHECK-NEXT: [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], splat (i64 63) ; CHECK-NEXT: [[T1_NEG:%.*]] = trunc nsw <2 x i64> [[T0_NEG]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T1_NEG]] ; diff --git a/llvm/test/Transforms/InstCombine/high-bit-signmask.ll b/llvm/test/Transforms/InstCombine/high-bit-signmask.ll index 53a7fb4461c57e..b4bad9e6bd8e1f 100644 --- a/llvm/test/Transforms/InstCombine/high-bit-signmask.ll +++ b/llvm/test/Transforms/InstCombine/high-bit-signmask.ll @@ -40,7 +40,7 @@ define i64 @t3_exact(i64 %x) { define <2 x i64> @t4(<2 x i64> %x) { ; CHECK-LABEL: @t4( -; CHECK-NEXT: [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], splat (i64 63) ; CHECK-NEXT: ret <2 x i64> [[T0_NEG]] ; %t0 = lshr <2 x i64> %x, diff --git a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll index ab37becf0e3117..ff03b3d5b678c6 100644 --- a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll +++ b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll @@ -26,7 +26,7 @@ define i8 @t0(i8 %x) { define <2 x i8> @t1_vec(<2 x i8> %x) { ; CHECK-LABEL: @t1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -43) ; CHECK-NEXT: [[NEGBIAS:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[NEGBIAS]] ; diff --git a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll index 6049997db4d1ae..d4aa8b5dbf5055 100644 --- a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll +++ b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll @@ -44,7 +44,7 @@ define i8 @t1_commutative(i8 %y) { define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[NEGBIAS:%.*]] = sub <2 x i8> zeroinitializer, [[TMP2]] ; CHECK-NEXT: ret <2 x i8> [[NEGBIAS]] @@ -57,7 +57,7 @@ define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @t3_vec_poison(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t3_vec_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: [[NEGBIAS:%.*]] = sub <2 x i8> zeroinitializer, [[TMP2]] ; CHECK-NEXT: ret <2 x i8> [[NEGBIAS]] diff --git a/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll b/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll index 2217666f0f49a2..f4e72c073e8a5c 100644 --- a/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll +++ b/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll @@ -34,7 +34,7 @@ define i8 @t1(i8 %x, i8 %y) { define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t2_vec( ; CHECK-NEXT: [[NOT_X_NOT:%.*]] = ashr <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], +; CHECK-NEXT: [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i8> [[ASHR]] ; %not_x = xor <2 x i8> %x, @@ -45,7 +45,7 @@ define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @t3_vec_poison(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t3_vec_poison( ; CHECK-NEXT: [[NOT_X_NOT:%.*]] = ashr <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], +; CHECK-NEXT: [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i8> [[ASHR]] ; %not_x = xor <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/hoist-xor-by-constant-from-xor-by-value.ll b/llvm/test/Transforms/InstCombine/hoist-xor-by-constant-from-xor-by-value.ll index f92b10b0ccb371..39a76ae11ceef3 100644 --- a/llvm/test/Transforms/InstCombine/hoist-xor-by-constant-from-xor-by-value.ll +++ b/llvm/test/Transforms/InstCombine/hoist-xor-by-constant-from-xor-by-value.ll @@ -20,7 +20,7 @@ define i8 @t0_scalar(i8 %x, i8 %y) { define <2 x i8> @t1_splatvec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_splatvec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[TMP1]], splat (i8 42) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %i0 = xor <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll index 0c141d4b8e73aa..dd8e9c1a45ea10 100644 --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -514,7 +514,7 @@ bb: define <2 x i1> @cvt_icmp_2_sext_plus_zext_ne_vec(<2 x i1> %arg, <2 x i1> %arg1) { ; CHECK-LABEL: @cvt_icmp_2_sext_plus_zext_ne_vec( ; CHECK-NEXT: bb: -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; bb: %i = sext <2 x i1> %arg to <2 x i32> @@ -1740,7 +1740,7 @@ define i1 @test1(i32 %a) { define <2 x i1> @test1vec(<2 x i32> %a) { ; CHECK-LABEL: @test1vec( -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 -5) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add <2 x i32> %a, @@ -1760,7 +1760,7 @@ define i1 @test2(i32 %a) { define <2 x i1> @test2vec(<2 x i32> %a) { ; CHECK-LABEL: @test2vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 4) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = sub <2 x i32> %a, @@ -1780,7 +1780,7 @@ define i1 @test3(i32 %a) { define <2 x i1> @test3vec(<2 x i32> %a) { ; CHECK-LABEL: @test3vec( -; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i32> [[A:%.*]], splat (i32 2147483643) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add <2 x i32> %a, @@ -1818,7 +1818,7 @@ define { i32, i1 } @test4multiuse(i32 %a) { define <2 x i1> @test4vec(<2 x i32> %a) { ; CHECK-LABEL: @test4vec( -; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], splat (i32 -4) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add <2 x i32> %a, @@ -1841,7 +1841,7 @@ define i1 @nsw_slt1(i8 %a) { define <2 x i1> @nsw_slt1_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @nsw_slt1_splat_vec( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add nsw <2 x i8> %a, @@ -1864,7 +1864,7 @@ define i1 @nsw_slt2(i8 %a) { define <2 x i1> @nsw_slt2_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @nsw_slt2_splat_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add nsw <2 x i8> %a, @@ -1913,7 +1913,7 @@ define i1 @nsw_sgt1(i8 %a) { define <2 x i1> @nsw_sgt1_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @nsw_sgt1_splat_vec( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add nsw <2 x i8> %a, @@ -1933,7 +1933,7 @@ define i1 @nsw_sgt2(i8 %a) { define <2 x i1> @nsw_sgt2_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @nsw_sgt2_splat_vec( -; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -126) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = add nsw <2 x i8> %a, @@ -1958,7 +1958,7 @@ define i1 @slt_zero_add_nsw(i32 %a) { define <2 x i1> @slt_zero_add_nsw_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @slt_zero_add_nsw_splat_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = add nsw <2 x i8> %a, @@ -2117,7 +2117,7 @@ define <2 x i1> @op_ugt_sum_vec_commute2(<2 x i8> %p1, <2 x i8> %p2) { ; CHECK-LABEL: @op_ugt_sum_vec_commute2( ; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[P1:%.*]] ; CHECK-NEXT: [[Y:%.*]] = sdiv <2 x i8> , [[P2:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X]], splat (i8 -1) ; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i8> [[Y]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -2149,7 +2149,7 @@ define <2 x i1> @sum_ult_op_vec_commute1(<2 x i8> %p1, <2 x i8> %p2) { ; CHECK-LABEL: @sum_ult_op_vec_commute1( ; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[P1:%.*]] ; CHECK-NEXT: [[Y:%.*]] = sdiv <2 x i8> , [[P2:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X]], splat (i8 -1) ; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i8> [[Y]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -2367,7 +2367,7 @@ define <2 x i1> @icmp_eq_add_non_splat(<2 x i32> %a) { define <2 x i1> @icmp_eq_add_undef2(<2 x i32> %a) { ; CHECK-LABEL: @icmp_eq_add_undef2( -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 5) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[ADD]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -2378,7 +2378,7 @@ define <2 x i1> @icmp_eq_add_undef2(<2 x i32> %a) { define <2 x i1> @icmp_eq_add_non_splat2(<2 x i32> %a) { ; CHECK-LABEL: @icmp_eq_add_non_splat2( -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 5) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[ADD]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -2496,7 +2496,7 @@ define i1 @ugt_offset_use(i32 %a) { define <2 x i1> @ugt_offset_splat(<2 x i5> %a) { ; CHECK-LABEL: @ugt_offset_splat( -; CHECK-NEXT: [[OV:%.*]] = icmp slt <2 x i5> [[A:%.*]], +; CHECK-NEXT: [[OV:%.*]] = icmp slt <2 x i5> [[A:%.*]], splat (i5 -9) ; CHECK-NEXT: ret <2 x i1> [[OV]] ; %t = add <2 x i5> %a, @@ -2552,7 +2552,7 @@ define i1 @ult_offset_use(i32 %a) { define <2 x i1> @ult_offset_splat(<2 x i5> %a) { ; CHECK-LABEL: @ult_offset_splat( -; CHECK-NEXT: [[OV:%.*]] = icmp sgt <2 x i5> [[A:%.*]], +; CHECK-NEXT: [[OV:%.*]] = icmp sgt <2 x i5> [[A:%.*]], splat (i5 -10) ; CHECK-NEXT: ret <2 x i1> [[OV]] ; %t = add <2 x i5> %a, @@ -2608,7 +2608,7 @@ define i1 @sgt_offset_use(i32 %a) { define <2 x i1> @sgt_offset_splat(<2 x i5> %a) { ; CHECK-LABEL: @sgt_offset_splat( -; CHECK-NEXT: [[OV:%.*]] = icmp ult <2 x i5> [[A:%.*]], +; CHECK-NEXT: [[OV:%.*]] = icmp ult <2 x i5> [[A:%.*]], splat (i5 7) ; CHECK-NEXT: ret <2 x i1> [[OV]] ; %t = add <2 x i5> %a, @@ -2664,7 +2664,7 @@ define i1 @slt_offset_use(i32 %a) { define <2 x i1> @slt_offset_splat(<2 x i5> %a) { ; CHECK-LABEL: @slt_offset_splat( -; CHECK-NEXT: [[OV:%.*]] = icmp ugt <2 x i5> [[A:%.*]], +; CHECK-NEXT: [[OV:%.*]] = icmp ugt <2 x i5> [[A:%.*]], splat (i5 6) ; CHECK-NEXT: ret <2 x i1> [[OV]] ; %t = add <2 x i5> %a, @@ -2976,8 +2976,8 @@ define i1 @icmp_dec_nonzero(i16 %x) { define <2 x i1> @icmp_dec_nonzero_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_dec_nonzero_vec( -; CHECK-NEXT: [[O:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[I:%.*]] = add nsw <2 x i32> [[O]], +; CHECK-NEXT: [[O:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 8) +; CHECK-NEXT: [[I:%.*]] = add nsw <2 x i32> [[O]], splat (i32 -1) ; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[I]], ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -3068,8 +3068,8 @@ define i1 @ult_add_C2_neg_C_pow2(i8 %x) { define <2 x i1> @ult_add_C2_pow2_C_neg_vec(<2 x i8> %x) { ; CHECK-LABEL: @ult_add_C2_pow2_C_neg_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -32) +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], splat (i8 -64) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = add <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll index d092363309fec0..78f1bc7d7379d4 100644 --- a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll +++ b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll @@ -18,7 +18,7 @@ define i32 @icmp_eq_and_pow2_shl1(i32 %0) { define <2 x i32> @icmp_eq_and_pow2_shl1_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_shl1_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], splat (i32 4) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -44,7 +44,7 @@ define i32 @icmp_ne_and_pow2_shl1(i32 %0) { define <2 x i32> @icmp_ne_and_pow2_shl1_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_shl1_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], splat (i32 4) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -70,7 +70,7 @@ define i32 @icmp_eq_and_pow2_shl_pow2(i32 %0) { define <2 x i32> @icmp_eq_and_pow2_shl_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], splat (i32 2) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -96,7 +96,7 @@ define i32 @icmp_ne_and_pow2_shl_pow2(i32 %0) { define <2 x i32> @icmp_ne_and_pow2_shl_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], splat (i32 2) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -162,7 +162,7 @@ define i32 @icmp_eq_and_pow2_minus1_shl1(i32 %0) { define <2 x i32> @icmp_eq_and_pow2_minus1_shl1_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_minus1_shl1_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], splat (i32 3) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -188,7 +188,7 @@ define i32 @icmp_ne_and_pow2_minus1_shl1(i32 %0) { define <2 x i32> @icmp_ne_and_pow2_minus1_shl1_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_minus1_shl1_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], splat (i32 4) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -214,7 +214,7 @@ define i32 @icmp_eq_and_pow2_minus1_shl_pow2(i32 %0) { define <2 x i32> @icmp_eq_and_pow2_minus1_shl_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_minus1_shl_pow2_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], splat (i32 1) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -240,7 +240,7 @@ define i32 @icmp_ne_and_pow2_minus1_shl_pow2(i32 %0) { define <2 x i32> @icmp_ne_and_pow2_minus1_shl_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_minus1_shl_pow2_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], splat (i32 2) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -293,7 +293,7 @@ define i32 @icmp_eq_and1_lshr_pow2(i32 %0) { define <2 x i32> @icmp_eq_and1_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and1_lshr_pow2_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], splat (i32 3) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -319,7 +319,7 @@ define i32 @icmp_ne_and1_lshr_pow2(i32 %0) { define <2 x i32> @icmp_ne_and1_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and1_lshr_pow2_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], splat (i32 1) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -356,7 +356,7 @@ define i32 @icmp_eq_and_pow2_lshr_pow2_case2(i32 %0) { define <2 x i32> @icmp_eq_and_pow2_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], splat (i32 1) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -393,7 +393,7 @@ define i32 @icmp_ne_and_pow2_lshr_pow2_case2(i32 %0) { define <2 x i32> @icmp_ne_and_pow2_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2_vec( -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], splat (i32 1) ; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CONV]] ; @@ -686,7 +686,7 @@ define i1 @test_const_shr_and_2_ne_0_negative(i32 %b) { define <8 x i1> @test_const_shr_and_1_ne_0_v8i8_splat_negative(<8 x i8> %b) { ; CHECK-LABEL: @test_const_shr_and_1_ne_0_v8i8_splat_negative( -; CHECK-NEXT: [[SHR:%.*]] = lshr <8 x i8> , [[B:%.*]] +; CHECK-NEXT: [[SHR:%.*]] = lshr <8 x i8> splat (i8 42), [[B:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = trunc <8 x i8> [[SHR]] to <8 x i1> ; CHECK-NEXT: ret <8 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll index f667e1aa105d04..264e951b32e49b 100644 --- a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll +++ b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll @@ -16,8 +16,8 @@ define i1 @is_rem2_neg_i8(i8 %x) { define <2 x i1> @is_rem2_pos_v2i8(<2 x i8> %x) { ; CHECK-LABEL: @is_rem2_pos_v2i8( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -127) +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %s = srem <2 x i8> %x, @@ -210,8 +210,8 @@ define i1 @udiv_eq_umax(i8 %x, i8 %y) { define <2 x i1> @udiv_ne_umax(<2 x i5> %x, <2 x i5> %y) { ; CHECK-LABEL: @udiv_ne_umax( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i5> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i5> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i5> [[X:%.*]], splat (i5 -1) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i5> [[Y:%.*]], splat (i5 1) ; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -299,8 +299,8 @@ define i1 @sdiv_eq_smin(i8 %x, i8 %y) { define <2 x i1> @sdiv_ne_smin(<2 x i5> %x, <2 x i5> %y) { ; CHECK-LABEL: @sdiv_ne_smin( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i5> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i5> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i5> [[X:%.*]], splat (i5 -16) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i5> [[Y:%.*]], splat (i5 1) ; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -434,7 +434,7 @@ define i1 @lshr_x_by_const_cmp_x(i32 %x) { define <4 x i1> @lshr_by_const_cmp_sle_value(<4 x i32> %x) { ; CHECK-LABEL: @lshr_by_const_cmp_sle_value( -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %v = lshr <4 x i32> %x, @@ -444,7 +444,7 @@ define <4 x i1> @lshr_by_const_cmp_sle_value(<4 x i32> %x) { define <4 x i1> @lshr_by_const_cmp_sle_value_non_splat(<4 x i32> %x) { ; CHECK-LABEL: @lshr_by_const_cmp_sle_value_non_splat( -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %v = lshr <4 x i32> %x, @@ -455,7 +455,7 @@ define <4 x i1> @lshr_by_const_cmp_sle_value_non_splat(<4 x i32> %x) { define <4 x i1> @ashr_by_const_cmp_sge_value_non_splat(<4 x i32> %x) { ; CHECK-LABEL: @ashr_by_const_cmp_sge_value_non_splat( -; CHECK-NEXT: [[R:%.*]] = icmp slt <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %v = ashr <4 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-fsh.ll b/llvm/test/Transforms/InstCombine/icmp-fsh.ll index 8154f312dc5d40..cafc171301f7d3 100644 --- a/llvm/test/Transforms/InstCombine/icmp-fsh.ll +++ b/llvm/test/Transforms/InstCombine/icmp-fsh.ll @@ -45,7 +45,7 @@ define i1 @rotl_eq_n1(i8 %x, i8 %y) { define <2 x i1> @rotl_ne_n1(<2 x i5> %x, <2 x i5> %y) { ; CHECK-LABEL: @rotl_ne_n1( -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[X:%.*]], splat (i5 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %rot = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5>%x, <2 x i5> %x, <2 x i5> %y) diff --git a/llvm/test/Transforms/InstCombine/icmp-logical.ll b/llvm/test/Transforms/InstCombine/icmp-logical.ll index 4690ead483a5ba..50feb51092fd9e 100644 --- a/llvm/test/Transforms/InstCombine/icmp-logical.ll +++ b/llvm/test/Transforms/InstCombine/icmp-logical.ll @@ -17,7 +17,7 @@ define i1 @masked_and_notallzeroes(i32 %A) { define <2 x i1> @masked_and_notallzeroes_splat(<2 x i32> %A) { ; CHECK-LABEL: @masked_and_notallzeroes_splat( -; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 7) ; CHECK-NEXT: [[TST1:%.*]] = icmp ne <2 x i32> [[MASK1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[TST1]] ; @@ -371,7 +371,7 @@ define i1 @fold_mask_cmps_to_true_logical(i32 %x) { define <2 x i1> @nomask_splat_and_B_allones(<2 x i32> %A) { ; CHECK-LABEL: @nomask_splat_and_B_allones( -; CHECK-NEXT: [[RES:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[RES:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 -268435457) ; CHECK-NEXT: ret <2 x i1> [[RES]] ; %tst1 = icmp slt <2 x i32> %A, @@ -383,8 +383,8 @@ define <2 x i1> @nomask_splat_and_B_allones(<2 x i32> %A) { define <2 x i1> @nomask_splat_and_B_mixed(<2 x i32> %A) { ; CHECK-LABEL: @nomask_splat_and_B_mixed( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -268435456) +; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 1879048192) ; CHECK-NEXT: ret <2 x i1> [[RES]] ; %tst1 = icmp sgt <2 x i32> %A, @@ -476,8 +476,8 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1(i32 %x) { define <2 x i1> @masked_icmps_mask_notallzeros_bmask_mixed_1_vector(<2 x i32> %x) { ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_1_vector( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T5:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 15) +; CHECK-NEXT: [[T5:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 9) ; CHECK-NEXT: ret <2 x i1> [[T5]] ; %t1 = and <2 x i32> %x, @@ -1778,8 +1778,8 @@ define i1 @masked_icmps_bmask_notmixed_or(i32 %A) { define <2 x i1> @masked_icmps_bmask_notmixed_or_vec(<2 x i8> %A) { ; CHECK-LABEL: @masked_icmps_bmask_notmixed_or_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i8> [[TMP1]], +; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 15) +; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i8> [[MASK1]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[RES]] ; %mask1 = and <2 x i8> %A, ; 0x0f @@ -1792,9 +1792,9 @@ define <2 x i1> @masked_icmps_bmask_notmixed_or_vec(<2 x i8> %A) { define <2 x i1> @masked_icmps_bmask_notmixed_or_vec_poison1(<2 x i8> %A) { ; CHECK-LABEL: @masked_icmps_bmask_notmixed_or_vec_poison1( -; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 15) ; CHECK-NEXT: [[TST1:%.*]] = icmp eq <2 x i8> [[MASK1]], -; CHECK-NEXT: [[TST2:%.*]] = icmp eq <2 x i8> [[A]], +; CHECK-NEXT: [[TST2:%.*]] = icmp eq <2 x i8> [[A]], splat (i8 -13) ; CHECK-NEXT: [[RES:%.*]] = or <2 x i1> [[TST1]], [[TST2]] ; CHECK-NEXT: ret <2 x i1> [[RES]] ; @@ -1808,8 +1808,8 @@ define <2 x i1> @masked_icmps_bmask_notmixed_or_vec_poison1(<2 x i8> %A) { define <2 x i1> @masked_icmps_bmask_notmixed_or_vec_poison2(<2 x i8> %A) { ; CHECK-LABEL: @masked_icmps_bmask_notmixed_or_vec_poison2( -; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[TST1:%.*]] = icmp eq <2 x i8> [[MASK1]], +; CHECK-NEXT: [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 15) +; CHECK-NEXT: [[TST1:%.*]] = icmp eq <2 x i8> [[MASK1]], splat (i8 3) ; CHECK-NEXT: [[TST2:%.*]] = icmp eq <2 x i8> [[A]], ; CHECK-NEXT: [[RES:%.*]] = or <2 x i1> [[TST1]], [[TST2]] ; CHECK-NEXT: ret <2 x i1> [[RES]] diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll index 7e7f087ca7112c..7fa75184c1cf3b 100644 --- a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll @@ -158,7 +158,7 @@ define i32 @pr40493_neg3(i32 %area) { define <4 x i1> @pr40493_vec1(<4 x i32> %area) { ; CHECK-LABEL: @pr40493_vec1( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[AREA:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[AREA:%.*]], splat (i32 1) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -171,7 +171,7 @@ define <4 x i1> @pr40493_vec1(<4 x i32> %area) { define <4 x i1> @pr40493_vec2(<4 x i32> %area) { ; CHECK-LABEL: @pr40493_vec2( ; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[AREA:%.*]], -; CHECK-NEXT: [[REM:%.*]] = and <4 x i32> [[MUL]], +; CHECK-NEXT: [[REM:%.*]] = and <4 x i32> [[MUL]], splat (i32 4) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[REM]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -183,7 +183,7 @@ define <4 x i1> @pr40493_vec2(<4 x i32> %area) { define <4 x i1> @pr40493_vec3(<4 x i32> %area) { ; CHECK-LABEL: @pr40493_vec3( -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[AREA:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[AREA:%.*]], splat (i32 12) ; CHECK-NEXT: [[REM:%.*]] = and <4 x i32> [[MUL]], ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[REM]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] diff --git a/llvm/test/Transforms/InstCombine/icmp-mul.ll b/llvm/test/Transforms/InstCombine/icmp-mul.ll index c4543c9deef388..c9f9b6d809e8a5 100644 --- a/llvm/test/Transforms/InstCombine/icmp-mul.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul.ll @@ -257,7 +257,7 @@ define i1 @eq_nsw_rem_zero(i8 %x) { define <2 x i1> @ne_nsw_rem_zero(<2 x i8> %x) { ; CHECK-LABEL: @ne_nsw_rem_zero( -; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i8> [[X:%.*]], splat (i8 -6) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = mul nsw <2 x i8> %x, @@ -270,7 +270,7 @@ define <2 x i1> @ne_nsw_rem_zero(<2 x i8> %x) { define <2 x i1> @ne_nsw_rem_zero_undef1(<2 x i8> %x) { ; CHECK-LABEL: @ne_nsw_rem_zero_undef1( ; CHECK-NEXT: [[A:%.*]] = mul nsw <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i8> [[A]], +; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i8> [[A]], splat (i8 -30) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = mul nsw <2 x i8> %x, @@ -282,7 +282,7 @@ define <2 x i1> @ne_nsw_rem_zero_undef1(<2 x i8> %x) { define <2 x i1> @ne_nsw_rem_zero_undef2(<2 x i8> %x) { ; CHECK-LABEL: @ne_nsw_rem_zero_undef2( -; CHECK-NEXT: [[A:%.*]] = mul nsw <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = mul nsw <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i8> [[A]], ; CHECK-NEXT: ret <2 x i1> [[B]] ; @@ -328,7 +328,7 @@ define i1 @ne_nsw_rem_nz(i8 %x) { define <2 x i1> @eq_nuw_rem_zero(<2 x i8> %x) { ; CHECK-LABEL: @eq_nuw_rem_zero( -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = mul nuw <2 x i8> %x, @@ -341,7 +341,7 @@ define <2 x i1> @eq_nuw_rem_zero(<2 x i8> %x) { define <2 x i1> @eq_nuw_rem_zero_undef1(<2 x i8> %x) { ; CHECK-LABEL: @eq_nuw_rem_zero_undef1( ; CHECK-NEXT: [[A:%.*]] = mul nuw <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i8> [[A]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i8> [[A]], splat (i8 20) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = mul nuw <2 x i8> %x, @@ -353,7 +353,7 @@ define <2 x i1> @eq_nuw_rem_zero_undef1(<2 x i8> %x) { define <2 x i1> @eq_nuw_rem_zero_undef2(<2 x i8> %x) { ; CHECK-LABEL: @eq_nuw_rem_zero_undef2( -; CHECK-NEXT: [[A:%.*]] = mul nuw <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = mul nuw <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i8> [[A]], ; CHECK-NEXT: ret <2 x i1> [[B]] ; @@ -783,7 +783,7 @@ define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) { define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @eq_mul_constants_with_tz_splat( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1073741823) ; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -1042,7 +1042,7 @@ define i1 @mul_evenC_ne(i8 %v) { define <2 x i1> @mul_oddC_ne_vec(<2 x i8> %v) { ; CHECK-LABEL: @mul_oddC_ne_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[V:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %mul = mul <2 x i8> %v, @@ -1110,7 +1110,7 @@ define i1 @mul_xy_z_assumeodd_eq(i8 %x, i8 %y, i8 %z) { define <2 x i1> @reused_mul_nsw_xy_z_setnonzero_vec_ne(<2 x i8> %x, <2 x i8> %y, <2 x i8> %zi) { ; CHECK-LABEL: @reused_mul_nsw_xy_z_setnonzero_vec_ne( -; CHECK-NEXT: [[Z:%.*]] = or <2 x i8> [[ZI:%.*]], +; CHECK-NEXT: [[Z:%.*]] = or <2 x i8> [[ZI:%.*]], splat (i8 4) ; CHECK-NEXT: [[MULY:%.*]] = mul nsw <2 x i8> [[Y:%.*]], [[Z]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[Y]], [[X:%.*]] ; CHECK-NEXT: call void @usev2xi8(<2 x i8> [[MULY]]) diff --git a/llvm/test/Transforms/InstCombine/icmp-not-bool-constant.ll b/llvm/test/Transforms/InstCombine/icmp-not-bool-constant.ll index 439c1d6af6654c..9afe2a93c56f00 100644 --- a/llvm/test/Transforms/InstCombine/icmp-not-bool-constant.ll +++ b/llvm/test/Transforms/InstCombine/icmp-not-bool-constant.ll @@ -7,7 +7,7 @@ define <2 x i1> @eq_t_not(<2 x i1> %a) { ; CHECK-LABEL: @eq_t_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -35,7 +35,7 @@ define <2 x i1> @ne_t_not(<2 x i1> %a) { define <2 x i1> @ne_f_not(<2 x i1> %a) { ; CHECK-LABEL: @ne_f_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -54,7 +54,7 @@ define <2 x i1> @ugt_t_not(<2 x i1> %a) { define <2 x i1> @ugt_f_not(<2 x i1> %a) { ; CHECK-LABEL: @ugt_f_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -109,7 +109,7 @@ define <2 x i1> @slt_t_not(<2 x i1> %a) { define <2 x i1> @slt_f_not(<2 x i1> %a) { ; CHECK-LABEL: @slt_f_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -119,7 +119,7 @@ define <2 x i1> @slt_f_not(<2 x i1> %a) { define <2 x i1> @uge_t_not(<2 x i1> %a) { ; CHECK-LABEL: @uge_t_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -129,7 +129,7 @@ define <2 x i1> @uge_t_not(<2 x i1> %a) { define <2 x i1> @uge_f_not(<2 x i1> %a) { ; CHECK-LABEL: @uge_f_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp uge <2 x i1> %not, @@ -138,7 +138,7 @@ define <2 x i1> @uge_f_not(<2 x i1> %a) { define <2 x i1> @ule_t_not(<2 x i1> %a) { ; CHECK-LABEL: @ule_t_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp ule <2 x i1> %not, @@ -156,7 +156,7 @@ define <2 x i1> @ule_f_not(<2 x i1> %a) { define <2 x i1> @sge_t_not(<2 x i1> %a) { ; CHECK-LABEL: @sge_t_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp sge <2 x i1> %not, @@ -174,7 +174,7 @@ define <2 x i1> @sge_f_not(<2 x i1> %a) { define <2 x i1> @sle_t_not(<2 x i1> %a) { ; CHECK-LABEL: @sle_t_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -184,7 +184,7 @@ define <2 x i1> @sle_t_not(<2 x i1> %a) { define <2 x i1> @sle_f_not(<2 x i1> %a) { ; CHECK-LABEL: @sle_f_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp sle <2 x i1> %not, diff --git a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll index 09c9c1ebc83159..0bcbc3bcb050b0 100644 --- a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll +++ b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll @@ -149,7 +149,7 @@ define <2 x i1> @icmp_sgt_x_negy_fail_partial(<2 x i8> %x, <2 x i8> %yy) { define <2 x i1> @icmp_sle_x_posy(<2 x i8> %x, <2 x i8> %yy) { ; CHECK-LABEL: @icmp_sle_x_posy( -; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %y = and <2 x i8> %yy, @@ -187,7 +187,7 @@ define i1 @icmp_sgt_x_posy(i8 %x, i8 %y) { define <2 x i1> @icmp_sgt_negx_y(<2 x i8> %xx, <2 x i8> %y) { ; CHECK-LABEL: @icmp_sgt_negx_y( -; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %x = or <2 x i8> %xx, diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll index 93eeab4732185b..993325f6ff0b09 100644 --- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll +++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll @@ -31,7 +31,7 @@ define <2 x i1> @or_ule(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @or_slt_pos(<2 x i8> %xx, <2 x i8> %yy, <2 x i8> %z) { ; CHECK-LABEL: @or_slt_pos( ; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[XX:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[Y:%.*]] = and <2 x i8> [[YY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = and <2 x i8> [[YY:%.*]], splat (i8 127) ; CHECK-NEXT: [[XN1:%.*]] = or <2 x i8> [[X]], [[Y]] ; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i8> [[X]], [[XN1]] ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -192,7 +192,7 @@ define i1 @or_slt_intmin(i8 %x) { define <2 x i1> @or_slt_intmin_2(<2 x i8> %xx, <2 x i8> %z) { ; CHECK-LABEL: @or_slt_intmin_2( ; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[XX:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[XN1:%.*]] = or <2 x i8> [[X]], +; CHECK-NEXT: [[XN1:%.*]] = or <2 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i8> [[X]], [[XN1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -268,7 +268,7 @@ pos: define <2 x i1> @or_sgt_intmin_2(<2 x i8> %xx, <2 x i8> %z) { ; CHECK-LABEL: @or_sgt_intmin_2( ; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[XX:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[XN1:%.*]] = or <2 x i8> [[X]], +; CHECK-NEXT: [[XN1:%.*]] = or <2 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[X]], [[XN1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll index a4e7acbca930dc..0635bb31c44fcf 100644 --- a/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll +++ b/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll @@ -384,8 +384,8 @@ define i1 @xor_ult(i8 %x) { define <2 x i1> @xor_sgt(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @xor_sgt( -; CHECK-NEXT: [[YZ:%.*]] = and <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[Y1:%.*]] = or disjoint <2 x i8> [[YZ]], +; CHECK-NEXT: [[YZ:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 31) +; CHECK-NEXT: [[Y1:%.*]] = or disjoint <2 x i8> [[YZ]], splat (i8 64) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i8> [[X:%.*]], [[Y1]] ; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[XOR]], [[X]] ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -399,8 +399,8 @@ define <2 x i1> @xor_sgt(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @xor_sgt_fail_no_known_msb(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @xor_sgt_fail_no_known_msb( -; CHECK-NEXT: [[YZ:%.*]] = and <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[Y1:%.*]] = or disjoint <2 x i8> [[YZ]], +; CHECK-NEXT: [[YZ:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 55) +; CHECK-NEXT: [[Y1:%.*]] = or disjoint <2 x i8> [[YZ]], splat (i8 8) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i8> [[X:%.*]], [[Y1]] ; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[XOR]], [[X]] ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -426,7 +426,7 @@ define i1 @xor_slt_2(i8 %x, i8 %y, i8 %z) { define <2 x i1> @xor_sgt_intmin_2(<2 x i8> %xx, <2 x i8> %yy, <2 x i8> %z) { ; CHECK-LABEL: @xor_sgt_intmin_2( ; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[XX:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[Y:%.*]] = or <2 x i8> [[YY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = or <2 x i8> [[YY:%.*]], splat (i8 -128) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i8> [[X]], [[Y]] ; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[X]], [[XOR]] ; CHECK-NEXT: ret <2 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/icmp-or.ll b/llvm/test/Transforms/InstCombine/icmp-or.ll index 56115f6d7d3414..6e054bd8d8839e 100644 --- a/llvm/test/Transforms/InstCombine/icmp-or.ll +++ b/llvm/test/Transforms/InstCombine/icmp-or.ll @@ -16,8 +16,8 @@ define i1 @set_low_bit_mask_eq(i8 %x) { define <2 x i1> @set_low_bit_mask_ne(<2 x i8> %x) { ; CHECK-LABEL: @set_low_bit_mask_ne( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -4) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[TMP1]], splat (i8 16) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = or <2 x i8> %x, @@ -220,7 +220,7 @@ define i1 @eq_const_mask_use2(i8 %x, i8 %y) { define <2 x i1> @decrement_slt_0(<2 x i8> %x) { ; CHECK-LABEL: @decrement_slt_0( -; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %dec = add <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-power2-and-icmp-shifted-mask.ll b/llvm/test/Transforms/InstCombine/icmp-power2-and-icmp-shifted-mask.ll index a151fd96ef4981..edbaffa6a12b9b 100644 --- a/llvm/test/Transforms/InstCombine/icmp-power2-and-icmp-shifted-mask.ll +++ b/llvm/test/Transforms/InstCombine/icmp-power2-and-icmp-shifted-mask.ll @@ -373,7 +373,7 @@ define i1 @icmp_power2_and_icmp_shifted_mask_swapped_8_12_mask_overlap_fail(i32 ; Vector of 1 reduction define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147483647(<1 x i32> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147483647( -; CHECK-NEXT: [[T4:%.*]] = icmp ult <1 x i32> [[X:%.*]], +; CHECK-NEXT: [[T4:%.*]] = icmp ult <1 x i32> [[X:%.*]], splat (i32 2147483647) ; CHECK-NEXT: ret <1 x i1> [[T4]] ; %t1 = icmp ult <1 x i32> %x, @@ -385,7 +385,7 @@ define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147483647( define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_2147483647(<1 x i32> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_2147483647( -; CHECK-NEXT: [[T4:%.*]] = icmp ult <1 x i32> [[X:%.*]], +; CHECK-NEXT: [[T4:%.*]] = icmp ult <1 x i32> [[X:%.*]], splat (i32 2147483647) ; CHECK-NEXT: ret <1 x i1> [[T4]] ; %t1 = icmp ult <1 x i32> %x, @@ -529,9 +529,9 @@ define <6 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_64_others(<6 x ; Vector of 0 of 1 compatible, no change define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147482647_gap_in_mask_fail(<1 x i32> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147482647_gap_in_mask_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <1 x i32> [[X:%.*]], -; CHECK-NEXT: [[T2:%.*]] = and <1 x i32> [[X]], -; CHECK-NEXT: [[T3:%.*]] = icmp ne <1 x i32> [[T2]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <1 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[T2:%.*]] = and <1 x i32> [[X]], splat (i32 2147482647) +; CHECK-NEXT: [[T3:%.*]] = icmp ne <1 x i32> [[T2]], splat (i32 2147482647) ; CHECK-NEXT: [[T4:%.*]] = and <1 x i1> [[T1]], [[T3]] ; CHECK-NEXT: ret <1 x i1> [[T4]] ; @@ -544,9 +544,9 @@ define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147482647_ define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_2147482647_gap_in_mask_fail(<1 x i32> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_2147482647_gap_in_mask_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <1 x i32> [[X:%.*]], -; CHECK-NEXT: [[T2:%.*]] = and <1 x i32> [[X]], -; CHECK-NEXT: [[T3:%.*]] = icmp ne <1 x i32> [[T2]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <1 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[T2:%.*]] = and <1 x i32> [[X]], splat (i32 2147482647) +; CHECK-NEXT: [[T3:%.*]] = icmp ne <1 x i32> [[T2]], splat (i32 2147482647) ; CHECK-NEXT: [[T4:%.*]] = and <1 x i1> [[T3]], [[T1]] ; CHECK-NEXT: ret <1 x i1> [[T4]] ; @@ -560,7 +560,7 @@ define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_214 ; Vector 1 of 2 compatible, no change define <2 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_1073741823_gap_between_masks_fail(<2 x i32> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_2147483648_1073741823_gap_between_masks_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[T2:%.*]] = and <2 x i32> [[X]], ; CHECK-NEXT: [[T3:%.*]] = icmp ne <2 x i32> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <2 x i1> [[T1]], [[T3]] @@ -575,7 +575,7 @@ define <2 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_1073741823_ define <2 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_1073741823_gap_between_masks_fail(<2 x i32> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_1073741823_gap_between_masks_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[T2:%.*]] = and <2 x i32> [[X]], ; CHECK-NEXT: [[T3:%.*]] = icmp ne <2 x i32> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <2 x i1> [[T3]], [[T1]] @@ -591,7 +591,7 @@ define <2 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_107 ; Vector 1 of 7 compatible, no change define <7 x i1> @icmp_power2_and_icmp_shifted_mask_vector_128_1_of_7_fail(<7 x i8> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_128_1_of_7_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <7 x i8> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <7 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[T2:%.*]] = and <7 x i8> [[X]], ; CHECK-NEXT: [[T3:%.*]] = icmp ne <7 x i8> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <7 x i1> [[T1]], [[T3]] @@ -606,7 +606,7 @@ define <7 x i1> @icmp_power2_and_icmp_shifted_mask_vector_128_1_of_7_fail(<7 x i define <7 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_1_of_7_fail(<7 x i8> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_1_of_7_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <7 x i8> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <7 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[T2:%.*]] = and <7 x i8> [[X]], ; CHECK-NEXT: [[T3:%.*]] = icmp ne <7 x i8> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <7 x i1> [[T3]], [[T1]] @@ -622,7 +622,7 @@ define <7 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_1_of_7_fai ; Vector 0 of 6 compatible, no change define <6 x i1> @icmp_power2_and_icmp_shifted_mask_vector_128_0_of_6_fail(<6 x i8> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_128_0_of_6_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <6 x i8> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <6 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[T2:%.*]] = and <6 x i8> [[X]], ; CHECK-NEXT: [[T3:%.*]] = icmp ne <6 x i8> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <6 x i1> [[T1]], [[T3]] @@ -637,7 +637,7 @@ define <6 x i1> @icmp_power2_and_icmp_shifted_mask_vector_128_0_of_6_fail(<6 x i define <6 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_0_of_6_fail(<6 x i8> %x) { ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_0_of_6_fail( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <6 x i8> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <6 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[T2:%.*]] = and <6 x i8> [[X]], ; CHECK-NEXT: [[T3:%.*]] = icmp ne <6 x i8> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <6 x i1> [[T3]], [[T1]] diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll index 2db5bad17b1977..97ed552b9a6da2 100644 --- a/llvm/test/Transforms/InstCombine/icmp-range.ll +++ b/llvm/test/Transforms/InstCombine/icmp-range.ll @@ -238,7 +238,7 @@ define <2 x i1> @test_two_ranges_vec_false(ptr nocapture readonly %arg1, ptr noc ; Values' ranges do not overlap each other, so it can simplified to true. define <2 x i1> @test_two_ranges_vec_true(ptr nocapture readonly %arg1, ptr nocapture readonly %arg2) { ; CHECK-LABEL: @test_two_ranges_vec_true( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %val1 = load <2 x i32>, ptr %arg1, !range !0 %val2 = load <2 x i32>, ptr %arg2, !range !6 @@ -268,7 +268,7 @@ define <2 x i1> @test_two_argument_ranges_vec_false(<2 x i32> range(i32 1, 6) %a ; Values' ranges do not overlap each other, so it can simplified to true. define <2 x i1> @test_two_argument_ranges_vec_true(<2 x i32> range(i32 1, 6) %arg1, <2 x i32> range(i32 8, 16) %arg2) { ; CHECK-LABEL: @test_two_argument_ranges_vec_true( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %rval = icmp ugt <2 x i32> %arg2, %arg1 ret <2 x i1> %rval @@ -545,7 +545,7 @@ define i1 @sub_ule_zext(i1 %b, i8 %x, i8 %y) { define <2 x i1> @sub_ult_and(<2 x i8> %b, <2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @sub_ult_and( -; CHECK-NEXT: [[A:%.*]] = and <2 x i8> [[B:%.*]], +; CHECK-NEXT: [[A:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 1) ; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[S]], [[A]] ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -775,7 +775,7 @@ define i1 @sub_ult_sext(i1 %b, i8 %x, i8 %y) { define <2 x i1> @sub_ule_ashr(<2 x i8> %b, <2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @sub_ule_ashr( -; CHECK-NEXT: [[A:%.*]] = ashr <2 x i8> [[B:%.*]], +; CHECK-NEXT: [[A:%.*]] = ashr <2 x i8> [[B:%.*]], splat (i8 7) ; CHECK-NEXT: [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ule <2 x i8> [[S]], [[A]] ; CHECK-NEXT: ret <2 x i1> [[R]] @@ -1017,7 +1017,7 @@ define i1 @zext_sext_add_icmp_ugt_1(i1 %a, i1 %b) { define <2 x i1> @vector_zext_sext_add_icmp_slt_1(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @vector_zext_sext_add_icmp_slt_1( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[B:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -1335,7 +1335,7 @@ define i1 @icmp_eq_zext_ne_non_boolean(i32 %a) { define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_zext_eq_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp eq <2 x i32> %a, %conv = zext <2 x i1> %cmp to <2 x i32> @@ -1345,7 +1345,7 @@ define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_zext_ne_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_zext_ne_zero_vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp ne <2 x i32> %a, @@ -1356,7 +1356,7 @@ define <2 x i1> @icmp_ne_zext_ne_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_zext_eq_one_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_zext_eq_one_vec( -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp eq <2 x i32> %a, @@ -1367,7 +1367,7 @@ define <2 x i1> @icmp_ne_zext_eq_one_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_zext_ne_one_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_zext_ne_one_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp ne <2 x i32> %a, %conv = zext <2 x i1> %cmp to <2 x i32> @@ -1520,7 +1520,7 @@ define i1 @icmp_eq_sext_ne_otherwise(i32 %a) { define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp eq <2 x i32> %a, %conv = sext <2 x i1> %cmp to <2 x i32> @@ -1530,8 +1530,8 @@ define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_zero_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -1) +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -2) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp ne <2 x i32> %a, @@ -1542,8 +1542,8 @@ define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_eq_allones_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -1) +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -2) ; CHECK-NEXT: ret <2 x i1> [[CMP1]] ; %cmp = icmp eq <2 x i32> %a, @@ -1554,7 +1554,7 @@ define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) { define <2 x i1> @icmp_ne_sext_ne_allones_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_sext_ne_allones_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp ne <2 x i32> %a, %conv = sext <2 x i1> %cmp to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/icmp-rotate.ll b/llvm/test/Transforms/InstCombine/icmp-rotate.ll index eeaa1c78610976..c2d03b1786535c 100644 --- a/llvm/test/Transforms/InstCombine/icmp-rotate.ll +++ b/llvm/test/Transforms/InstCombine/icmp-rotate.ll @@ -161,7 +161,7 @@ define i1 @ror_ne_cst(i8 %x) { define <2 x i1> @rol_eq_cst_vec(<2 x i5> %x) { ; CHECK-LABEL: @rol_eq_cst_vec( -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i5> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i5> [[X:%.*]], splat (i5 8) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %f = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5> %x, <2 x i5> %x, <2 x i5> ) @@ -171,7 +171,7 @@ define <2 x i1> @rol_eq_cst_vec(<2 x i5> %x) { define <2 x i1> @rol_eq_cst_undef(<2 x i5> %x) { ; CHECK-LABEL: @rol_eq_cst_undef( -; CHECK-NEXT: [[F:%.*]] = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5> [[X:%.*]], <2 x i5> [[X]], <2 x i5> ) +; CHECK-NEXT: [[F:%.*]] = tail call <2 x i5> @llvm.fshl.v2i5(<2 x i5> [[X:%.*]], <2 x i5> [[X]], <2 x i5> splat (i5 3)) ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i5> [[F]], ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-select.ll b/llvm/test/Transforms/InstCombine/icmp-select.ll index fb68c6ee942075..0bdbc88ba67c67 100644 --- a/llvm/test/Transforms/InstCombine/icmp-select.ll +++ b/llvm/test/Transforms/InstCombine/icmp-select.ll @@ -485,7 +485,7 @@ define i1 @select_constants_and_icmp_ne0_all_uses(i1 %x, i1 %y) { define <2 x i1> @select_constants_and_icmp_ne0_vec_splat(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @select_constants_and_icmp_ne0_vec_splat( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = xor <2 x i1> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %s1 = select <2 x i1> %x, <2 x i9> , <2 x i9> diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-1-overflow.ll b/llvm/test/Transforms/InstCombine/icmp-shl-1-overflow.ll index 0803c03efdbf49..936bcfda1fc9a0 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shl-1-overflow.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shl-1-overflow.ll @@ -16,7 +16,7 @@ define i1 @icmp_shl_ugt_1(i8 %x) { define <2 x i1> @icmp_shl_ugt_2(<2 x i32> %_x) { ; CHECK-LABEL: @icmp_shl_ugt_2( -; CHECK-NEXT: [[X:%.*]] = add <2 x i32> [[_X:%.*]], +; CHECK-NEXT: [[X:%.*]] = add <2 x i32> [[_X:%.*]], splat (i32 42) ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[X]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -28,7 +28,7 @@ define <2 x i1> @icmp_shl_ugt_2(<2 x i32> %_x) { define <3 x i1> @icmp_shl_uge_1(<3 x i7> %x) { ; CHECK-LABEL: @icmp_shl_uge_1( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i7> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i7> [[X:%.*]], splat (i7 -1) ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %add = shl <3 x i7> %x, @@ -63,8 +63,8 @@ define i1 @icmp_shl_ult_1(i16 %x) { define <4 x i1> @icmp_shl_ult_2(<4 x i4> %_x) { ; CHECK-LABEL: @icmp_shl_ult_2( -; CHECK-NEXT: [[X:%.*]] = add <4 x i4> [[_X:%.*]], -; CHECK-NEXT: [[ADD:%.*]] = shl <4 x i4> [[X]], +; CHECK-NEXT: [[X:%.*]] = add <4 x i4> [[_X:%.*]], splat (i4 -6) +; CHECK-NEXT: [[ADD:%.*]] = shl <4 x i4> [[X]], splat (i4 1) ; CHECK-NEXT: call void @usev4(<4 x i4> [[ADD]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i4> [[X]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] @@ -78,7 +78,7 @@ define <4 x i1> @icmp_shl_ult_2(<4 x i4> %_x) { define <2 x i1> @icmp_shl_ule_1(<2 x i8> %x) { ; CHECK-LABEL: @icmp_shl_ule_1( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = shl <2 x i8> %x, @@ -110,7 +110,7 @@ define i1 @icmp_shl_eq_1(i8 %x) { define <2 x i1> @icmp_shl_eq_2(<2 x i8> %_x) { ; CHECK-LABEL: @icmp_shl_eq_2( -; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[_X:%.*]] +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[_X:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll index 6b9ea1f8ef97e2..06175f1635a90d 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll @@ -194,7 +194,7 @@ define i1 @icmp_sgt11(i8 %x) { define <2 x i1> @icmp_sgt11_vec(<2 x i8> %x) { ; CHECK-LABEL: @icmp_sgt11_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl nsw <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll index 9f50265004f01b..abd2021fae116f 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll @@ -43,7 +43,7 @@ define <2 x i1> @icmp_ule_16x2(<2 x i64>) { define <2 x i1> @icmp_ule_16x2_nonzero(<2 x i64>) { ; CHECK-LABEL: @icmp_ule_16x2_nonzero( -; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i64> [[TMP0:%.*]], +; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i64> [[TMP0:%.*]], splat (i64 4) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %c = shl nuw <2 x i64> %0, @@ -53,7 +53,7 @@ define <2 x i1> @icmp_ule_16x2_nonzero(<2 x i64>) { define <2 x i1> @icmp_ule_12x2(<2 x i64>) { ; CHECK-LABEL: @icmp_ule_12x2( -; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i64> [[TMP0:%.*]], +; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i64> [[TMP0:%.*]], splat (i64 4) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %c = shl nuw <2 x i64> %0, @@ -73,7 +73,7 @@ define i1 @icmp_ult_8(i64) { define <2 x i1> @icmp_uge_8x2(<2 x i16>) { ; CHECK-LABEL: @icmp_uge_8x2( -; CHECK-NEXT: [[D:%.*]] = icmp ugt <2 x i16> [[TMP0:%.*]], +; CHECK-NEXT: [[D:%.*]] = icmp ugt <2 x i16> [[TMP0:%.*]], splat (i16 15) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %c = shl nuw <2 x i16> %0, @@ -83,7 +83,7 @@ define <2 x i1> @icmp_uge_8x2(<2 x i16>) { define <2 x i1> @icmp_ugt_16x2(<2 x i32>) { ; CHECK-LABEL: @icmp_ugt_16x2( -; CHECK-NEXT: [[D:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[D:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], splat (i32 15) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %c = shl nuw <2 x i32> %0, diff --git a/llvm/test/Transforms/InstCombine/icmp-shl.ll b/llvm/test/Transforms/InstCombine/icmp-shl.ll index 5295bd01aa0899..2b8ec33bcf34dc 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shl.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shl.ll @@ -67,7 +67,7 @@ define <2 x i1> @shl_vec_nsw_slt_1_0_todo_non_splat(<2 x i8> %x, <2 x i8> %C) { define <2 x i1> @shl_nsw_sle_n1(<2 x i8> %x, <2 x i8> %C) { ; CHECK-LABEL: @shl_nsw_sle_n1( ; CHECK-NEXT: [[Y:%.*]] = shl nsw <2 x i8> [[X:%.*]], [[C:%.*]] -; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i8> [[Y]], +; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i8> [[Y]], splat (i8 2) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %y = shl nsw <2 x i8> %x, %C @@ -150,7 +150,7 @@ define i1 @shl_nuw_ugt_Csle0_fail_missing_flag(i8 %x, i8 %C) { define <2 x i1> @shl_nsw_nuw_sgt_Csle0(<2 x i8> %x, <2 x i8> %C) { ; CHECK-LABEL: @shl_nsw_nuw_sgt_Csle0( -; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -10) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %y = shl nsw nuw <2 x i8> %x, %C @@ -171,7 +171,7 @@ define <2 x i1> @shl_nsw_nuw_sge_Csle0_todo_non_splat(<2 x i8> %x, <2 x i8> %C) define <2 x i1> @shl_nsw_nuw_sle_Csle0(<2 x i8> %x, <2 x i8> %C) { ; CHECK-LABEL: @shl_nsw_nuw_sle_Csle0( -; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 -5) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %y = shl nsw nuw <2 x i8> %x, %C @@ -182,7 +182,7 @@ define <2 x i1> @shl_nsw_nuw_sle_Csle0(<2 x i8> %x, <2 x i8> %C) { define <2 x i1> @shl_nsw_nuw_slt_Csle0_fail_positive(<2 x i8> %x, <2 x i8> %C) { ; CHECK-LABEL: @shl_nsw_nuw_slt_Csle0_fail_positive( ; CHECK-NEXT: [[Y:%.*]] = shl nuw nsw <2 x i8> [[X:%.*]], [[C:%.*]] -; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i8> [[Y]], +; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i8> [[Y]], splat (i8 6) ; CHECK-NEXT: ret <2 x i1> [[Z]] ; %y = shl nsw nuw <2 x i8> %x, %C diff --git a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll index 5f09964fd93ad5..70f25391847614 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll @@ -2454,7 +2454,7 @@ define i1 @ashr_00_00_ashr_extra_use(i8 %x, ptr %ptr) { define <4 x i1> @ashr_00_00_vec(<4 x i8> %x) { ; CHECK-LABEL: @ashr_00_00_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i8> [[X:%.*]], splat (i8 88) ; CHECK-NEXT: ret <4 x i1> [[C]] ; %s = ashr exact <4 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll index bdcba9ed1549b9..8aceba04e0aeb4 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll @@ -17,7 +17,7 @@ define i1 @lshr_eq_msb_low_last_zero(i8 %a) { define <2 x i1> @lshr_eq_msb_low_last_zero_vec(<2 x i8> %a) { ; CHECK-LABEL: @lshr_eq_msb_low_last_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> [[A:%.*]], splat (i8 6) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shr = lshr <2 x i8> , %a @@ -1358,7 +1358,7 @@ define i1 @exactly_one_set_signbit_use1(i8 %x, i8 %y) { define <2 x i1> @same_signbit(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @same_signbit( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[R1]] ; %xsign = lshr <2 x i8> %x, @@ -1519,7 +1519,7 @@ define i1 @exactly_one_set_signbit_use1_signed(i8 %x, i8 %y) { define <2 x i1> @same_signbit_signed(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @same_signbit_signed( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[R1]] ; %xsign = ashr <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-signmask.ll b/llvm/test/Transforms/InstCombine/icmp-signmask.ll index 5424f7d7e8021f..4127d4071b4f11 100644 --- a/llvm/test/Transforms/InstCombine/icmp-signmask.ll +++ b/llvm/test/Transforms/InstCombine/icmp-signmask.ll @@ -45,7 +45,7 @@ define <2 x i1> @cmp_x_and_negp2_with_ne_or_z(<2 x i8> %x) { define <2 x i1> @cmp_x_and_negp2_with_ne_fail_not_p2(<2 x i8> %x) { ; CHECK-LABEL: @cmp_x_and_negp2_with_ne_fail_not_p2( ; CHECK-NEXT: [[ANDX:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[ANDX]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[ANDX]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %andx = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp-sub.ll b/llvm/test/Transforms/InstCombine/icmp-sub.ll index 8cb3c1c181cec7..4143902bc9c46b 100644 --- a/llvm/test/Transforms/InstCombine/icmp-sub.ll +++ b/llvm/test/Transforms/InstCombine/icmp-sub.ll @@ -164,7 +164,7 @@ define <2 x i1> @icmp_eq_sub_non_splat(<2 x i32> %a) { define <2 x i1> @icmp_eq_sub_undef2(<2 x i32> %a) { ; CHECK-LABEL: @icmp_eq_sub_undef2( -; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> , [[A:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> splat (i32 15), [[A:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[SUB]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -175,7 +175,7 @@ define <2 x i1> @icmp_eq_sub_undef2(<2 x i32> %a) { define <2 x i1> @icmp_eq_sub_non_splat2(<2 x i32> %a) { ; CHECK-LABEL: @icmp_eq_sub_non_splat2( -; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> , [[A:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> splat (i32 15), [[A:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[SUB]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -312,8 +312,8 @@ define i1 @neg_slt_42(i128 %x) { define <2 x i1> @neg_ugt_42_splat(<2 x i7> %x) { ; CHECK-LABEL: @neg_ugt_42_splat( -; CHECK-NEXT: [[NOTSUB:%.*]] = add <2 x i7> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i7> [[NOTSUB]], +; CHECK-NEXT: [[NOTSUB:%.*]] = add <2 x i7> [[X:%.*]], splat (i7 -1) +; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i7> [[NOTSUB]], splat (i7 -43) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %negx = sub <2 x i7> zeroinitializer, %x @@ -606,7 +606,7 @@ entry: define <2 x i1> @PR60818_ne_vector(<2 x i32> %a) { ; CHECK-LABEL: @PR60818_ne_vector( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP0]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-trunc.ll b/llvm/test/Transforms/InstCombine/icmp-trunc.ll index 4de64d85bbf25a..b85deabf5fa060 100644 --- a/llvm/test/Transforms/InstCombine/icmp-trunc.ll +++ b/llvm/test/Transforms/InstCombine/icmp-trunc.ll @@ -17,7 +17,7 @@ define i1 @ult_2(i32 %x) { define <2 x i1> @ult_16_splat(<2 x i16> %x) { ; CHECK-LABEL: @ult_16_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 2032) ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -83,8 +83,8 @@ define i1 @ult_192(i32 %x) { define <2 x i1> @ult_2044_splat(<2 x i16> %x) { ; CHECK-LABEL: @ult_2044_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 2044) +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], splat (i16 2044) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t = trunc <2 x i16> %x to <2 x i11> @@ -133,7 +133,7 @@ define i1 @ugt_3(i32 %x) { define <2 x i1> @ugt_7_splat(<2 x i16> %x) { ; CHECK-LABEL: @ugt_7_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 2040) ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -183,8 +183,8 @@ define i1 @ugt_253(i32 %x) { define <2 x i1> @ugt_2043_splat(<2 x i16> %x) { ; CHECK-LABEL: @ugt_2043_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 2044) +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], splat (i16 2044) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t = trunc <2 x i16> %x to <2 x i11> @@ -233,7 +233,7 @@ define i1 @slt_0(i32 %x) { define <2 x i1> @slt_0_splat(<2 x i16> %x) { ; CHECK-LABEL: @slt_0_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 1024) ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -279,7 +279,7 @@ define i1 @sgt_n1(i32 %x) { define <2 x i1> @sgt_n1_splat(<2 x i16> %x) { ; CHECK-LABEL: @sgt_n1_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 1024) ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -331,7 +331,7 @@ define i1 @trunc_eq_i32_i8(i32 %x) { define <2 x i1> @trunc_eq_v2i32_v2i8(<2 x i32> %x) { ; CHECK-LABEL: @trunc_eq_v2i32_v2i8( ; CHECK-NEXT: [[T:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[T]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[T]], splat (i8 42) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t = trunc <2 x i32> %x to <2 x i8> @@ -368,7 +368,7 @@ define i1 @shl1_trunc_eq0(i32 %a) { define <2 x i1> @shl1_trunc_ne0(<2 x i8> %a) { ; CHECK-LABEL: @shl1_trunc_ne0( -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[A:%.*]], splat (i8 5) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %shl = shl <2 x i8> , %a @@ -765,7 +765,7 @@ define i1 @sge_nsw_i48(i48 %x) { define <2 x i1> @uge_nsw(<2 x i32> %x) { ; CHECK-LABEL: @uge_nsw( ; CHECK-NEXT: [[T:%.*]] = trunc nsw <2 x i32> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[T]], +; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[T]], splat (i8 -46) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t = trunc nsw <2 x i32> %x to <2 x i8> diff --git a/llvm/test/Transforms/InstCombine/icmp-uadd-sat.ll b/llvm/test/Transforms/InstCombine/icmp-uadd-sat.ll index a61feac024a9c3..fc582708e0b750 100644 --- a/llvm/test/Transforms/InstCombine/icmp-uadd-sat.ll +++ b/llvm/test/Transforms/InstCombine/icmp-uadd-sat.ll @@ -8,8 +8,8 @@ ; Basic tests with one user ; ============================================================================== define i1 @icmp_eq_basic(i8 %arg) { -; CHECK-LABEL: define i1 @icmp_eq_basic -; CHECK-SAME: (i8 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_eq_basic( +; CHECK-SAME: i8 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[ARG]], 3 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -19,8 +19,8 @@ define i1 @icmp_eq_basic(i8 %arg) { } define i1 @icmp_ne_basic(i16 %arg) { -; CHECK-LABEL: define i1 @icmp_ne_basic -; CHECK-SAME: (i16 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_ne_basic( +; CHECK-SAME: i16 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i16 [[ARG]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -30,8 +30,8 @@ define i1 @icmp_ne_basic(i16 %arg) { } define i1 @icmp_ule_basic(i32 %arg) { -; CHECK-LABEL: define i1 @icmp_ule_basic -; CHECK-SAME: (i32 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_ule_basic( +; CHECK-SAME: i32 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ARG]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -41,8 +41,8 @@ define i1 @icmp_ule_basic(i32 %arg) { } define i1 @icmp_ult_basic(i64 %arg) { -; CHECK-LABEL: define i1 @icmp_ult_basic -; CHECK-SAME: (i64 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_ult_basic( +; CHECK-SAME: i64 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[ARG]], 15 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -52,8 +52,8 @@ define i1 @icmp_ult_basic(i64 %arg) { } define i1 @icmp_uge_basic(i8 %arg) { -; CHECK-LABEL: define i1 @icmp_uge_basic -; CHECK-SAME: (i8 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_uge_basic( +; CHECK-SAME: i8 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[ARG]], 3 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -63,8 +63,8 @@ define i1 @icmp_uge_basic(i8 %arg) { } define i1 @icmp_ugt_basic(i16 %arg) { -; CHECK-LABEL: define i1 @icmp_ugt_basic -; CHECK-SAME: (i16 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_ugt_basic( +; CHECK-SAME: i16 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[ARG]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -74,8 +74,8 @@ define i1 @icmp_ugt_basic(i16 %arg) { } define i1 @icmp_sle_basic(i32 %arg) { -; CHECK-LABEL: define i1 @icmp_sle_basic -; CHECK-SAME: (i32 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_sle_basic( +; CHECK-SAME: i32 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[ARG]], 2147483637 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -85,8 +85,8 @@ define i1 @icmp_sle_basic(i32 %arg) { } define i1 @icmp_slt_basic(i64 %arg) { -; CHECK-LABEL: define i1 @icmp_slt_basic -; CHECK-SAME: (i64 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_slt_basic( +; CHECK-SAME: i64 [[ARG:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[ARG]], 9223372036854775783 ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -96,8 +96,8 @@ define i1 @icmp_slt_basic(i64 %arg) { } define i1 @icmp_sge_basic(i8 %arg) { -; CHECK-LABEL: define i1 @icmp_sge_basic -; CHECK-SAME: (i8 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_sge_basic( +; CHECK-SAME: i8 [[ARG:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[ARG]], -3 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP1]], 124 ; CHECK-NEXT: ret i1 [[CMP]] @@ -108,8 +108,8 @@ define i1 @icmp_sge_basic(i8 %arg) { } define i1 @icmp_sgt_basic(i16 %arg) { -; CHECK-LABEL: define i1 @icmp_sgt_basic -; CHECK-SAME: (i16 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_sgt_basic( +; CHECK-SAME: i16 [[ARG:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[ARG]], -4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[TMP1]], 32762 ; CHECK-NEXT: ret i1 [[CMP]] @@ -123,8 +123,8 @@ define i1 @icmp_sgt_basic(i16 %arg) { ; Tests with more than user ; ============================================================================== define i1 @icmp_eq_multiuse(i8 %arg) { -; CHECK-LABEL: define i1 @icmp_eq_multiuse -; CHECK-SAME: (i8 [[ARG:%.*]]) { +; CHECK-LABEL: define i1 @icmp_eq_multiuse( +; CHECK-SAME: i8 [[ARG:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[ARG]], i8 2) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[ADD]], 5 ; CHECK-NEXT: call void @use.i8(i8 [[ADD]]) @@ -140,9 +140,9 @@ define i1 @icmp_eq_multiuse(i8 %arg) { ; Tests with vector types ; ============================================================================== define <2 x i1> @icmp_eq_vector_equal(<2 x i8> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_equal -; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], +; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_equal( +; CHECK-SAME: <2 x i8> [[ARG:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %arg, <2 x i8> ) @@ -151,8 +151,8 @@ define <2 x i1> @icmp_eq_vector_equal(<2 x i8> %arg) { } define <2 x i1> @icmp_eq_vector_unequal(<2 x i8> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_unequal -; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { +; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_unequal( +; CHECK-SAME: <2 x i8> [[ARG:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> ) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ADD]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] @@ -163,9 +163,9 @@ define <2 x i1> @icmp_eq_vector_unequal(<2 x i8> %arg) { } define <2 x i1> @icmp_ne_vector_equal(<2 x i16> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_equal -; CHECK-SAME: (<2 x i16> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i16> [[ARG]], +; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_equal( +; CHECK-SAME: <2 x i16> [[ARG:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i16> [[ARG]], splat (i16 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %arg, <2 x i16> ) @@ -174,8 +174,8 @@ define <2 x i1> @icmp_ne_vector_equal(<2 x i16> %arg) { } define <2 x i1> @icmp_ne_vector_unequal(<2 x i16> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_unequal -; CHECK-SAME: (<2 x i16> [[ARG:%.*]]) { +; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_unequal( +; CHECK-SAME: <2 x i16> [[ARG:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG]], <2 x i16> ) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i16> [[ADD]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] @@ -186,9 +186,9 @@ define <2 x i1> @icmp_ne_vector_unequal(<2 x i16> %arg) { } define <2 x i1> @icmp_ule_vector_equal(<2 x i32> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_equal -; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[ARG]], +; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_equal( +; CHECK-SAME: <2 x i32> [[ARG:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[ARG]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %arg, <2 x i32> ) @@ -197,8 +197,8 @@ define <2 x i1> @icmp_ule_vector_equal(<2 x i32> %arg) { } define <2 x i1> @icmp_ule_vector_unequal(<2 x i32> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_unequal -; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) { +; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_unequal( +; CHECK-SAME: <2 x i32> [[ARG:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[ARG]], <2 x i32> ) ; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[ADD]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] @@ -209,9 +209,9 @@ define <2 x i1> @icmp_ule_vector_unequal(<2 x i32> %arg) { } define <2 x i1> @icmp_sgt_vector_equal(<2 x i64> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_equal -; CHECK-SAME: (<2 x i64> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i64> [[ARG]], +; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_equal( +; CHECK-SAME: <2 x i64> [[ARG:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i64> [[ARG]], splat (i64 9223372036854366185) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %arg, <2 x i64> ) @@ -220,8 +220,8 @@ define <2 x i1> @icmp_sgt_vector_equal(<2 x i64> %arg) { } define <2 x i1> @icmp_sgt_vector_unequal(<2 x i64> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_unequal -; CHECK-SAME: (<2 x i64> [[ARG:%.*]]) { +; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_unequal( +; CHECK-SAME: <2 x i64> [[ARG:%.*]]) { ; CHECK-NEXT: [[ADD:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> [[ARG]], <2 x i64> ) ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i64> [[ADD]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] @@ -235,10 +235,10 @@ define <2 x i1> @icmp_sgt_vector_unequal(<2 x i64> %arg) { ; Tests with vector types and multiple uses ; ============================================================================== define <2 x i1> @icmp_eq_vector_multiuse_equal(<2 x i8> %arg) { -; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_multiuse_equal -; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> ) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ADD]], +; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_multiuse_equal( +; CHECK-SAME: <2 x i8> [[ARG:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> splat (i8 2)) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ADD]], splat (i8 5) ; CHECK-NEXT: call void @use.v2i8(<2 x i8> [[ADD]]) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll index 32ef6267cdf8b4..347d35548164b2 100644 --- a/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll @@ -39,7 +39,7 @@ define i1 @p0(i8 %val, i8 %bits) { define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[BITS:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[VAL_HIGHBITS]], zeroinitializer @@ -56,7 +56,7 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> , [[BITS:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <3 x i8> [[T0]], +; CHECK-NEXT: [[T1:%.*]] = add <3 x i8> [[T0]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp uge <3 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <3 x i1> [[R]] ; @@ -84,7 +84,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %val, <3 x i8> %bits) { define <3 x i1> @p2_vec_poison1(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_poison1( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> splat (i8 1), [[BITS:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer @@ -216,7 +216,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = shl <2 x i8> , [[BITS:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <2 x i8> [[T0]], +; CHECK-NEXT: [[T1:%.*]] = add <2 x i8> [[T0]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -229,7 +229,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n3_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[BITS:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = add <2 x i8> [[T0]], ; CHECK-NEXT: [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]] diff --git a/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll index c185e632b5194f..568faea6fc9ab1 100644 --- a/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll @@ -198,7 +198,7 @@ define i1 @n1(i8 %val, i8 %bits) { define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = shl <2 x i8> , [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], +; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp ule <2 x i8> [[VAL:%.*]], [[T1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -210,7 +210,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n3_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[BITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], ; CHECK-NEXT: [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll index 25894a22f00751..34fc0002eee168 100644 --- a/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll @@ -39,7 +39,7 @@ define i1 @p0(i8 %val, i8 %bits) { define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @p1_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[BITS:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS]] ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[VAL_HIGHBITS]], zeroinitializer @@ -56,7 +56,7 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> , [[BITS:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <3 x i8> [[T0]], +; CHECK-NEXT: [[T1:%.*]] = add <3 x i8> [[T0]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp ult <3 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <3 x i1> [[R]] ; @@ -84,7 +84,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %val, <3 x i8> %bits) { define <3 x i1> @p2_vec_poison1(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_poison1( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <3 x i8> splat (i8 1), [[BITS:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]] ; CHECK-NEXT: [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer @@ -216,7 +216,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = shl <2 x i8> , [[BITS:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = add <2 x i8> [[T0]], +; CHECK-NEXT: [[T1:%.*]] = add <2 x i8> [[T0]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -229,7 +229,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n3_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[BITS:%.*]] ; CHECK-NEXT: call void @use2i8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = add <2 x i8> [[T0]], ; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]] diff --git a/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll index c1912e11b93abf..d114a979d779d4 100644 --- a/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll @@ -198,7 +198,7 @@ define i1 @n1(i8 %val, i8 %bits) { define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = shl <2 x i8> , [[BITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], +; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[VAL:%.*]], [[T1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -210,7 +210,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) { ; CHECK-LABEL: @n3_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> , [[BITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[BITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = xor <2 x i8> [[T0]], ; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/icmp-usub-sat.ll b/llvm/test/Transforms/InstCombine/icmp-usub-sat.ll index 87257e40ac6a2c..2cd07b17af5806 100644 --- a/llvm/test/Transforms/InstCombine/icmp-usub-sat.ll +++ b/llvm/test/Transforms/InstCombine/icmp-usub-sat.ll @@ -275,7 +275,7 @@ define i1 @icmp_eq_multiuse_negative(i8 %arg) { define <2 x i1> @icmp_eq_vector_positive_equal(<2 x i8> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_positive_equal ; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], splat (i8 7) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %arg, <2 x i8> ) @@ -298,7 +298,7 @@ define <2 x i1> @icmp_eq_vector_positive_unequal(<2 x i8> %arg) { define <2 x i1> @icmp_ne_vector_positive_equal(<2 x i16> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_positive_equal ; CHECK-SAME: (<2 x i16> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i16> [[ARG]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i16> [[ARG]], splat (i16 37) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %arg, <2 x i16> ) @@ -321,7 +321,7 @@ define <2 x i1> @icmp_ne_vector_positive_unequal(<2 x i16> %arg) { define <2 x i1> @icmp_ule_vector_positive_equal(<2 x i32> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_positive_equal ; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[ARG]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[ARG]], splat (i32 37) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %arg, <2 x i32> ) @@ -344,8 +344,8 @@ define <2 x i1> @icmp_ule_vector_positive_unequal(<2 x i32> %arg) { define <2 x i1> @icmp_sgt_vector_positive_equal(<2 x i64> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_positive_equal ; CHECK-SAME: (<2 x i64> [[ARG:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[ARG]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[ARG]], splat (i64 -410858) +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i64> [[TMP1]], splat (i64 9223372036854774573) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %arg, <2 x i64> ) @@ -371,7 +371,7 @@ define <2 x i1> @icmp_sgt_vector_positive_unequal(<2 x i64> %arg) { define <2 x i1> @icmp_eq_vector_negative_equal(<2 x i8> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_negative_equal ; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], splat (i8 -3) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %arg, <2 x i8> ) @@ -397,8 +397,8 @@ define <2 x i1> @icmp_eq_vector_negative_unequal(<2 x i8> %arg) { define <2 x i1> @icmp_eq_vector_multiuse_positive_equal(<2 x i8> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_multiuse_positive_equal ; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[SUB:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> ) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[SUB]], +; CHECK-NEXT: [[SUB:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> splat (i8 2)) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[SUB]], splat (i8 5) ; CHECK-NEXT: call void @use.v2i8(<2 x i8> [[SUB]]) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -414,8 +414,8 @@ define <2 x i1> @icmp_eq_vector_multiuse_positive_equal(<2 x i8> %arg) { define <2 x i1> @icmp_eq_vector_multiuse_negative_equal(<2 x i8> %arg) { ; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_multiuse_negative_equal ; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) { -; CHECK-NEXT: [[SUB:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> ) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[SUB]], +; CHECK-NEXT: [[SUB:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> splat (i8 -20)) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[SUB]], splat (i8 5) ; CHECK-NEXT: call void @use.v2i8(<2 x i8> [[SUB]]) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll b/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll index 677655a0a99ab6..3678946e631a24 100644 --- a/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll @@ -44,7 +44,7 @@ define <2 x i1> @ule(<2 x i8> %x) { define <2 x i1> @ult_min_signed_value(<2 x i8> %x) { ; CHECK-LABEL: @ult_min_signed_value( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp ult <2 x i8> %x, @@ -55,7 +55,7 @@ define <2 x i1> @ult_min_signed_value(<2 x i8> %x) { define <2 x i1> @sge_zero(<2 x i8> %x) { ; CHECK-LABEL: @sge_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp sge <2 x i8> %x, @@ -64,7 +64,7 @@ define <2 x i1> @sge_zero(<2 x i8> %x) { define <2 x i1> @uge_zero(<2 x i8> %x) { ; CHECK-LABEL: @uge_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp uge <2 x i8> %x, ret <2 x i1> %cmp @@ -72,7 +72,7 @@ define <2 x i1> @uge_zero(<2 x i8> %x) { define <2 x i1> @sle_zero(<2 x i8> %x) { ; CHECK-LABEL: @sle_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp sle <2 x i8> %x, @@ -291,7 +291,7 @@ define <2 x i1> @same_shuffle_inputs_icmp_extra_use3(<4 x i8> %x, <4 x i8> %y) { define <4 x i1> @splat_icmp(<4 x i8> %x) { ; CHECK-LABEL: @splat_icmp( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -302,7 +302,7 @@ define <4 x i1> @splat_icmp(<4 x i8> %x) { define <4 x i1> @splat_icmp_poison(<4 x i8> %x) { ; CHECK-LABEL: @splat_icmp_poison( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -313,7 +313,7 @@ define <4 x i1> @splat_icmp_poison(<4 x i8> %x) { define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) { ; CHECK-LABEL: @splat_icmp_larger_size( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -324,7 +324,7 @@ define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) { define <4 x i1> @splat_fcmp_smaller_size(<5 x float> %x) { ; CHECK-LABEL: @splat_fcmp_smaller_size( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], splat (float 4.200000e+01) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <5 x i1> [[TMP1]], <5 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -339,7 +339,7 @@ define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) { ; CHECK-LABEL: @splat_icmp_extra_use( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: call void @use_v4i8(<4 x i8> [[SPLATX]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], splat (i8 42) ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> @@ -353,7 +353,7 @@ define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) { define <4 x i1> @not_splat_icmp(<4 x i8> %x) { ; CHECK-LABEL: @not_splat_icmp( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], splat (i8 42) ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/icmp-vec.ll b/llvm/test/Transforms/InstCombine/icmp-vec.ll index 1e392635936b7e..333b7519c80712 100644 --- a/llvm/test/Transforms/InstCombine/icmp-vec.ll +++ b/llvm/test/Transforms/InstCombine/icmp-vec.ll @@ -46,7 +46,7 @@ define <2 x i1> @ule(<2 x i8> %x) { define <2 x i1> @ult_min_signed_value(<2 x i8> %x) { ; CHECK-LABEL: @ult_min_signed_value( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp ult <2 x i8> %x, @@ -57,7 +57,7 @@ define <2 x i1> @ult_min_signed_value(<2 x i8> %x) { define <2 x i1> @sge_zero(<2 x i8> %x) { ; CHECK-LABEL: @sge_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp sge <2 x i8> %x, @@ -66,7 +66,7 @@ define <2 x i1> @sge_zero(<2 x i8> %x) { define <2 x i1> @uge_zero(<2 x i8> %x) { ; CHECK-LABEL: @uge_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp uge <2 x i8> %x, ret <2 x i1> %cmp @@ -74,7 +74,7 @@ define <2 x i1> @uge_zero(<2 x i8> %x) { define <2 x i1> @sle_zero(<2 x i8> %x) { ; CHECK-LABEL: @sle_zero( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp sle <2 x i8> %x, @@ -293,7 +293,7 @@ define <2 x i1> @same_shuffle_inputs_icmp_extra_use3(<4 x i8> %x, <4 x i8> %y) { define <4 x i1> @splat_icmp(<4 x i8> %x) { ; CHECK-LABEL: @splat_icmp( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -304,7 +304,7 @@ define <4 x i1> @splat_icmp(<4 x i8> %x) { define <4 x i1> @splat_icmp_poison(<4 x i8> %x) { ; CHECK-LABEL: @splat_icmp_poison( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -315,7 +315,7 @@ define <4 x i1> @splat_icmp_poison(<4 x i8> %x) { define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) { ; CHECK-LABEL: @splat_icmp_larger_size( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -326,7 +326,7 @@ define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) { define <4 x i1> @splat_fcmp_smaller_size(<5 x float> %x) { ; CHECK-LABEL: @splat_fcmp_smaller_size( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], splat (float 4.200000e+01) ; CHECK-NEXT: [[CMP:%.*]] = shufflevector <5 x i1> [[TMP1]], <5 x i1> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -341,7 +341,7 @@ define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) { ; CHECK-LABEL: @splat_icmp_extra_use( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: call void @use_v4i8(<4 x i8> [[SPLATX]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], splat (i8 42) ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> @@ -355,7 +355,7 @@ define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) { define <4 x i1> @not_splat_icmp(<4 x i8> %x) { ; CHECK-LABEL: @not_splat_icmp( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], splat (i8 42) ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> @@ -381,7 +381,7 @@ define <4 x i1> @not_splat_icmp2(<4 x i8> %x) { define <2 x i1> @icmp_logical_or_vec(<2 x i64> %x, <2 x i64> %y, <2 x i1> %falseval) { ; CHECK-LABEL: @icmp_logical_or_vec( ; CHECK-NEXT: [[CMP_NE:%.*]] = icmp ne <2 x i64> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP_NE]], <2 x i1> , <2 x i1> [[FALSEVAL:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP_NE]], <2 x i1> splat (i1 true), <2 x i1> [[FALSEVAL:%.*]] ; CHECK-NEXT: ret <2 x i1> [[SEL]] ; %cmp.ne = icmp ne <2 x i64> %x, zeroinitializer @@ -520,7 +520,7 @@ define i1 @not_cast_ne-1(<3 x i1> %x) { define i1 @not_cast_ne-1_uses(<3 x i2> %x, ptr %p) { ; CHECK-LABEL: @not_cast_ne-1_uses( -; CHECK-NEXT: [[NOT:%.*]] = xor <3 x i2> [[X:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <3 x i2> [[X:%.*]], splat (i2 -1) ; CHECK-NEXT: store <3 x i2> [[NOT]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x i2> [[X]] to i6 ; CHECK-NEXT: [[R:%.*]] = icmp ne i6 [[TMP1]], 0 diff --git a/llvm/test/Transforms/InstCombine/icmp-with-selects.ll b/llvm/test/Transforms/InstCombine/icmp-with-selects.ll index 9ee7c78379f5c2..8bf75d0052fec2 100644 --- a/llvm/test/Transforms/InstCombine/icmp-with-selects.ll +++ b/llvm/test/Transforms/InstCombine/icmp-with-selects.ll @@ -150,7 +150,7 @@ define <4 x i1> @fold_vector_ops(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %pa ; CHECK-SAME: (<4 x i32> [[VAL1:%.*]], <4 x i32> [[VAL2:%.*]], <4 x i32> [[PARAM:%.*]], i1 [[COND:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[VAL2]], [[VAL1]] -; CHECK-NEXT: [[CMP:%.*]] = select i1 [[COND]], <4 x i1> [[TMP0]], <4 x i1> +; CHECK-NEXT: [[CMP:%.*]] = select i1 [[COND]], <4 x i1> [[TMP0]], <4 x i1> splat (i1 true) ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll b/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll index d08dca225328fe..dd07a7d452a777 100644 --- a/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll +++ b/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll @@ -113,7 +113,7 @@ define i1 @sge_to_ugt(i8 %x) { define <2 x i1> @sge_to_ugt_splat(<2 x i8> %x) { ; CHECK-LABEL: @sge_to_ugt_splat( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 -114) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %a = xor <2 x i8> %x, @@ -135,7 +135,7 @@ define i1 @uge_to_sgt(i8 %x) { define <2 x i1> @uge_to_sgt_splat(<2 x i8> %x) { ; CHECK-LABEL: @uge_to_sgt_splat( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -114) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %a = xor <2 x i8> %x, @@ -157,7 +157,7 @@ define i1 @sge_to_ult(i8 %x) { define <2 x i1> @sge_to_ult_splat(<2 x i8> %x) { ; CHECK-LABEL: @sge_to_ult_splat( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], splat (i8 113) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %a = xor <2 x i8> %x, @@ -179,7 +179,7 @@ define i1 @uge_to_slt(i8 %x) { define <2 x i1> @uge_to_slt_splat(<2 x i8> %x) { ; CHECK-LABEL: @uge_to_slt_splat( -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 113) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %a = xor <2 x i8> %x, @@ -268,7 +268,7 @@ define i1 @slt_zero_ne_ne_0(i32 %a) { define <4 x i1> @slt_zero_eq_ne_0_vec(<4 x i32> %a) { ; CHECK-LABEL: @slt_zero_eq_ne_0_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i1> [[TMP1]] ; %cmp = icmp ne <4 x i32> %a, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index d52a0b76979373..c1b9752607c3d4 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -22,7 +22,7 @@ define i32 @test1(i32 %X) { define <2 x i32> @test1vec(<2 x i32> %X) { ; CHECK-LABEL: @test1vec( -; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[X_LOBIT]] ; %a = icmp slt <2 x i32> %X, zeroinitializer @@ -43,7 +43,7 @@ define i32 @test2(i32 %X) { define <2 x i32> @test2vec(<2 x i32> %X) { ; CHECK-LABEL: @test2vec( -; CHECK-NEXT: [[A:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[A]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[B]] ; @@ -167,7 +167,7 @@ define i1 @test9(i32 %x) { define <2 x i1> @test9_vec(<2 x i32> %x) { ; CHECK-LABEL: @test9_vec( -; CHECK-NEXT: [[B:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = add <2 x i32> %x, @@ -187,7 +187,7 @@ define i1 @test9b(i32 %x) { define <2 x i1> @test9b_vec(<2 x i32> %x) { ; CHECK-LABEL: @test9b_vec( -; CHECK-NEXT: [[B:%.*]] = icmp ult <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = add <2 x i32> %x, @@ -207,7 +207,7 @@ define i1 @test10(i32 %x) { define <2 x i1> @test10_vec(<2 x i32> %x) { ; CHECK-LABEL: @test10_vec( -; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = add <2 x i32> %x, @@ -227,7 +227,7 @@ define i1 @test10b(i32 %x) { define <2 x i1> @test10b_vec(<2 x i32> %x) { ; CHECK-LABEL: @test10b_vec( -; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i1> [[B]] ; %a = add <2 x i32> %x, @@ -246,7 +246,7 @@ define i1 @test11(i32 %x) { define <2 x i1> @test11_vec(<2 x i32> %x) { ; CHECK-LABEL: @test11_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %a = add nsw <2 x i32> %x, %b = icmp slt <2 x i32> %x, %a @@ -310,7 +310,7 @@ define i1 @test17(i32 %x) { define <2 x i1> @test17vec(<2 x i32> %x) { ; CHECK-LABEL: @test17vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %shl = shl <2 x i32> , %x @@ -332,7 +332,7 @@ define i1 @test17a(i32 %x) { define <2 x i1> @test17a_vec(<2 x i32> %x) { ; CHECK-LABEL: @test17a_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %x @@ -354,7 +354,7 @@ define i1 @test18_eq(i32 %x) { define <2 x i1> @test18_eq_vec(<2 x i32> %x) { ; CHECK-LABEL: @test18_eq_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %sh = lshr <2 x i32> , %x @@ -376,7 +376,7 @@ define i1 @test18_ne(i32 %x) { define <2 x i1> @test18_ne_vec(<2 x i32> %x) { ; CHECK-LABEL: @test18_ne_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %sh = lshr <2 x i32> , %x @@ -398,7 +398,7 @@ define i1 @test19(i32 %x) { define <2 x i1> @test19vec(<2 x i32> %x) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %shl = shl <2 x i32> , %x @@ -430,7 +430,7 @@ define i1 @test20(i32 %x) { define <2 x i1> @test20vec(<2 x i32> %x) { ; CHECK-LABEL: @test20vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; %shl = shl <2 x i32> , %x @@ -452,7 +452,7 @@ define i1 @test20a(i32 %x) { define <2 x i1> @test20a_vec(<2 x i32> %x) { ; CHECK-LABEL: @test20a_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %x @@ -494,7 +494,7 @@ define i1 @test23(i32 %x) { define <2 x i1> @test23vec(<2 x i32> %x) { ; CHECK-LABEL: @test23vec( -; CHECK-NEXT: [[I4:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[I4:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 1328634634) ; CHECK-NEXT: ret <2 x i1> [[I4]] ; %i3 = sdiv <2 x i32> %x, @@ -745,7 +745,7 @@ define i1 @shr_exact(i132 %x) { define <2 x i1> @shr_exact_vec(<2 x i132> %x) { ; CHECK-LABEL: @shr_exact_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i132> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i132> [[X:%.*]], splat (i132 32) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sh = lshr exact <2 x i132> %x, @@ -1004,7 +1004,7 @@ define i1 @test_sdiv_neg_slt(i32 %x, i32 %y) { define <2 x i1> @test49(<2 x i32> %i3) { ; CHECK-LABEL: @test49( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; entry: %i11 = and <2 x i32> %i3, @@ -1137,7 +1137,7 @@ define i1 @test55(i32 %a) { define <2 x i1> @test55vec(<2 x i32> %a) { ; CHECK-LABEL: @test55vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], splat (i32 -123) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = sub <2 x i32> zeroinitializer, %a @@ -1157,7 +1157,7 @@ define i1 @test56(i32 %a) { define <2 x i1> @test56vec(<2 x i32> %a) { ; CHECK-LABEL: @test56vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], splat (i32 -113) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = sub <2 x i32> , %a @@ -1422,7 +1422,7 @@ define i1 @test67inverse(i32 %x) { define <2 x i1> @test67vec(<2 x i32> %x) { ; CHECK-LABEL: @test67vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 96) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1433,7 +1433,7 @@ define <2 x i1> @test67vec(<2 x i32> %x) { define <2 x i1> @test67vec2(<2 x i32> %x) { ; CHECK-LABEL: @test67vec2( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 96) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1444,7 +1444,7 @@ define <2 x i1> @test67vec2(<2 x i32> %x) { define <2 x i1> @test67vecinverse(<2 x i32> %x) { ; CHECK-LABEL: @test67vecinverse( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 96) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[AND]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1479,7 +1479,7 @@ define i1 @test70(i32 %X) { define <2 x i1> @test70vec(<2 x i32> %X) { ; CHECK-LABEL: @test70vec( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = add <2 x i32> %X, @@ -1515,7 +1515,7 @@ define i1 @icmp_sext8trunc(i32 %x) { define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_sext8trunc_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8> -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], splat (i8 36) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %trunc = trunc <2 x i32> %x to <2 x i8> @@ -1551,7 +1551,7 @@ define i1 @icmp_shl17(i32 %x) { define <2 x i1> @icmp_shl16_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_shl16_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i16> -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i16> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i16> [[TMP1]], splat (i16 36) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> %x, @@ -1583,7 +1583,7 @@ define i1 @icmp_shl_eq(i32 %x) { define <2 x i1> @icmp_shl_eq_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_shl_eq_vec( -; CHECK-NEXT: [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 134217727) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[MUL_MASK]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1625,7 +1625,7 @@ define i1 @icmp_shl_ne(i32 %x) { define <2 x i1> @icmp_shl_ne_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_shl_ne_vec( -; CHECK-NEXT: [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 33554431) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[MUL_MASK]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1636,7 +1636,7 @@ define <2 x i1> @icmp_shl_ne_vec(<2 x i32> %x) { define <2 x i1> @icmp_shl_nuw_ne_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_shl_nuw_ne_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl nuw <2 x i32> %x, @@ -1779,7 +1779,7 @@ define i1 @icmp_add20_eq_add57(i32 %x, i32 %y) { define <2 x i1> @icmp_add20_eq_add57_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_add20_eq_add57_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 37) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1791,7 +1791,7 @@ define <2 x i1> @icmp_add20_eq_add57_splat(<2 x i32> %x, <2 x i32> %y) { define <2 x i1> @icmp_add20_eq_add57_poison(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_add20_eq_add57_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 37) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1827,7 +1827,7 @@ define i1 @icmp_sub57_ne_sub20(i32 %x, i32 %y) { define <2 x i1> @icmp_sub57_ne_sub20_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_sub57_ne_sub20_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -37) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1839,7 +1839,7 @@ define <2 x i1> @icmp_sub57_ne_sub20_splat(<2 x i32> %x, <2 x i32> %y) { define <2 x i1> @icmp_sub57_ne_sub20_vec_poison(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_sub57_ne_sub20_vec_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -37) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1851,7 +1851,7 @@ define <2 x i1> @icmp_sub57_ne_sub20_vec_poison(<2 x i32> %x, <2 x i32> %y) { define <2 x i1> @icmp_sub57_ne_sub20_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_sub57_ne_sub20_vec_nonsplat( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 37) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1915,7 +1915,7 @@ define i1 @icmp_add20_sge_add57(i32 %x, i32 %y) { define <2 x i1> @icmp_add20_sge_add57_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_add20_sge_add57_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], splat (i32 37) ; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i32> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1927,7 +1927,7 @@ define <2 x i1> @icmp_add20_sge_add57_splat(<2 x i32> %x, <2 x i32> %y) { define <2 x i1> @icmp_add20_sge_add57_poison(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_add20_sge_add57_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], splat (i32 37) ; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i32> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1964,7 +1964,7 @@ define i1 @icmp_sub57_sge_sub20(i32 %x, i32 %y) { define <2 x i1> @icmp_sub57_sge_sub20_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_sub57_sge_sub20_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], splat (i32 -37) ; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -1976,7 +1976,7 @@ define <2 x i1> @icmp_sub57_sge_sub20_splat(<2 x i32> %x, <2 x i32> %y) { define <2 x i1> @icmp_sub57_sge_sub20_vec_poison(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @icmp_sub57_sge_sub20_vec_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], splat (i32 -37) ; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -2085,8 +2085,8 @@ define i1 @icmp_add_and_shr_ne_0(i32 %X) { define <2 x i1> @icmp_add_and_shr_ne_0_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_add_and_shr_ne_0_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 240) +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 224) ; CHECK-NEXT: ret <2 x i1> [[TOBOOL]] ; %shr = lshr <2 x i32> %X, @@ -2291,7 +2291,7 @@ define i1 @icmp_shl_1_V_ult_32(i32 %V) { define <2 x i1> @icmp_shl_1_V_ult_32_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_ult_32_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2311,7 +2311,7 @@ define i1 @icmp_shl_1_V_eq_32(i32 %V) { define <2 x i1> @icmp_shl_1_V_eq_32_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_eq_32_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2331,7 +2331,7 @@ define i1 @icmp_shl_1_V_ult_30(i32 %V) { define <2 x i1> @icmp_shl_1_V_ult_30_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_ult_30_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2351,7 +2351,7 @@ define i1 @icmp_shl_1_V_ugt_30(i32 %V) { define <2 x i1> @icmp_shl_1_V_ugt_30_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_ugt_30_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], splat (i32 4) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2371,7 +2371,7 @@ define i1 @icmp_shl_1_V_ule_30(i32 %V) { define <2 x i1> @icmp_shl_1_V_ule_30_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_ule_30_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2391,7 +2391,7 @@ define i1 @icmp_shl_1_V_uge_30(i32 %V) { define <2 x i1> @icmp_shl_1_V_uge_30_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_uge_30_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], splat (i32 4) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2411,7 +2411,7 @@ define i1 @icmp_shl_1_V_uge_2147483648(i32 %V) { define <2 x i1> @icmp_shl_1_V_uge_2147483648_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_uge_2147483648_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2431,7 +2431,7 @@ define i1 @icmp_shl_1_V_ult_2147483648(i32 %V) { define <2 x i1> @icmp_shl_1_V_ult_2147483648_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_ult_2147483648_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2451,7 +2451,7 @@ define i1 @icmp_shl_1_V_sle_0(i32 %V) { define <2 x i1> @icmp_shl_1_V_sle_0_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_sle_0_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2471,7 +2471,7 @@ define i1 @icmp_shl_1_V_sle_negative(i32 %V) { define <2 x i1> @icmp_shl_1_V_sle_0_negative(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_sle_0_negative( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2491,7 +2491,7 @@ define i1 @icmp_shl_1_V_sgt_0(i32 %V) { define <2 x i1> @icmp_shl_1_V_sgt_0_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_sgt_0_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2511,7 +2511,7 @@ define i1 @icmp_shl_1_V_sgt_negative(i32 %V) { define <2 x i1> @icmp_shl_1_V_sgt_negative_vec(<2 x i32> %V) { ; CHECK-LABEL: @icmp_shl_1_V_sgt_negative_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %V @@ -2546,7 +2546,7 @@ define i1 @or_icmp_eq_B_0_icmp_ult_A_B_logical(i64 %a, i64 %b) { define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_uniform(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], splat (i64 -1) ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -2558,7 +2558,7 @@ define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_uniform(<2 x i64> %a, <2 x i64> %b) define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_poison(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], splat (i64 -1) ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -2792,7 +2792,7 @@ define i1 @and_icmp_ne_B_0_icmp_uge_A_B_logical(i64 %a, i64 %b) { define <2 x i1> @and_icmp_ne_B_0_icmp_uge_A_B_uniform(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @and_icmp_ne_B_0_icmp_uge_A_B_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], splat (i64 -1) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -2804,7 +2804,7 @@ define <2 x i1> @and_icmp_ne_B_0_icmp_uge_A_B_uniform(<2 x i64> %a, <2 x i64> %b define <2 x i1> @and_icmp_ne_B_0_icmp_uge_A_B_poison(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @and_icmp_ne_B_0_icmp_uge_A_B_poison( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], splat (i64 -1) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; @@ -2827,8 +2827,8 @@ define i1 @icmp_add_ult_2(i32 %X) { define <2 x i1> @icmp_add_X_-14_ult_2_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_add_X_-14_ult_2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -2) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 14) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = add <2 x i32> %X, @@ -2849,8 +2849,8 @@ define i1 @icmp_sub_3_X_ult_2(i32 %X) { define <2 x i1> @icmp_sub_3_X_ult_2_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_sub_3_X_ult_2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -2) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = sub <2 x i32> , %X @@ -2871,8 +2871,8 @@ define i1 @icmp_add_X_-14_uge_2(i32 %X) { define <2 x i1> @icmp_add_X_-14_uge_2_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_add_X_-14_uge_2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -2) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 14) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = add <2 x i32> %X, @@ -2893,8 +2893,8 @@ define i1 @icmp_sub_3_X_uge_2(i32 %X) { define <2 x i1> @icmp_sub_3_X_uge_2_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_sub_3_X_uge_2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -2) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = sub <2 x i32> , %X @@ -2914,7 +2914,7 @@ define i1 @icmp_and_X_-16_eq-16(i32 %X) { define <2 x i1> @icmp_and_X_-16_eq-16_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_and_X_-16_eq-16_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 -17) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %and = and <2 x i32> %X, @@ -2934,7 +2934,7 @@ define i1 @icmp_and_X_-16_ne-16(i32 %X) { define <2 x i1> @icmp_and_X_-16_ne-16_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_and_X_-16_ne-16_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 -16) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %and = and <2 x i32> %X, @@ -2959,7 +2959,7 @@ define i1 @or1_eq1(i32 %x) { define <2 x i1> @or3_eq3_vec(<2 x i8> %x) { ; CHECK-LABEL: @or3_eq3_vec( -; CHECK-NEXT: [[T1:%.*]] = icmp ult <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp ult <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[T1]] ; %t0 = or <2 x i8> %x, @@ -2983,7 +2983,7 @@ define i1 @or7_ne7(i32 %x) { define <2 x i1> @or63_ne63_vec(<2 x i8> %x) { ; CHECK-LABEL: @or63_ne63_vec( -; CHECK-NEXT: [[T1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 63) ; CHECK-NEXT: ret <2 x i1> [[T1]] ; %t0 = or <2 x i8> %x, @@ -3009,7 +3009,7 @@ define i1 @orC_eqC(i32 %x) { define <2 x i1> @orC_eqC_vec(<2 x i8> %x) { ; CHECK-LABEL: @orC_eqC_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -44) ; CHECK-NEXT: [[T1:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T1]] ; @@ -3035,7 +3035,7 @@ define i1 @orC_neC(i32 %x) { define <2 x i1> @orC_neC_vec(<2 x i8> %x) { ; CHECK-LABEL: @orC_neC_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 42) ; CHECK-NEXT: [[T1:%.*]] = icmp ne <2 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T1]] ; @@ -3057,8 +3057,8 @@ define i1 @shrink_constant(i32 %X) { define <2 x i1> @shrink_constant_vec(<2 x i32> %X) { ; CHECK-LABEL: @shrink_constant_vec( -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[XOR]], +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 -12) +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[XOR]], splat (i32 4) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %xor = xor <2 x i32> %X, @@ -3079,7 +3079,7 @@ define i1 @icmp_sub_-1_X_ult_4(i32 %X) { define <2 x i1> @icmp_xor_neg4_X_ult_4_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_xor_neg4_X_ult_4_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 -5) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %xor = xor <2 x i32> %X, @@ -3099,7 +3099,7 @@ define i1 @icmp_sub_-1_X_uge_4(i32 %X) { define <2 x i1> @icmp_xor_neg4_X_uge_4_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_xor_neg4_X_uge_4_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 -4) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %xor = xor <2 x i32> %X, @@ -3109,7 +3109,7 @@ define <2 x i1> @icmp_xor_neg4_X_uge_4_vec(<2 x i32> %X) { define <2 x i1> @xor_ult(<2 x i8> %x) { ; CHECK-LABEL: @xor_ult( -; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %xor = xor <2 x i8> %x, @@ -3132,7 +3132,7 @@ define i1 @xor_ult_extra_use(i8 %x, ptr %p) { define <2 x i1> @xor_ugt(<2 x i8> %x) { ; CHECK-LABEL: @xor_ugt( -; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %xor = xor <2 x i8> %x, @@ -3233,7 +3233,7 @@ define <2 x i1> @icmp_and_or_lshr_vec(<2 x i32> %x, <2 x i32> %y) { define <2 x i1> @icmp_and_or_lshr_vec_commute(<2 x i32> %xp, <2 x i32> %y) { ; CHECK-LABEL: @icmp_and_or_lshr_vec_commute( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], splat (i32 42) ; CHECK-NEXT: [[SHF:%.*]] = lshr <2 x i32> [[X]], [[Y:%.*]] ; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[X]], [[SHF]] ; CHECK-NEXT: [[RET:%.*]] = trunc <2 x i32> [[OR]] to <2 x i1> @@ -3262,7 +3262,7 @@ define i1 @icmp_and_or_lshr_cst(i32 %x) { define <2 x i1> @icmp_and_or_lshr_cst_vec(<2 x i32> %x) { ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] ; @@ -3301,8 +3301,8 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_poison(<2 x i32> %x) { define <2 x i1> @icmp_and_or_lshr_cst_vec_commute(<2 x i32> %xp) { ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_commute( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], splat (i32 42) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], splat (i32 3) ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] ; @@ -3316,7 +3316,7 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_commute(<2 x i32> %xp) { define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform_commute(<2 x i32> %xp) { ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_nonuniform_commute( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], splat (i32 42) ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] @@ -3331,7 +3331,7 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform_commute(<2 x i32> %xp) { define <2 x i1> @icmp_and_or_lshr_cst_vec_poison_commute(<2 x i32> %xp) { ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_poison_commute( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], splat (i32 42) ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] @@ -3356,7 +3356,7 @@ define i1 @shl_ap1_zero_ap2_non_zero_2(i32 %a) { define <2 x i1> @shl_ap1_zero_ap2_non_zero_2_vec(<2 x i32> %a) { ; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_2_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 29) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i32> , %a @@ -3719,8 +3719,8 @@ define i1 @cmp_inverse_mask_bits_set_eq(i32 %x) { define <2 x i1> @cmp_inverse_mask_bits_set_eq_vec(<2 x i32> %x) { ; CHECK-LABEL: @cmp_inverse_mask_bits_set_eq_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -43) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 -43) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %or = or <2 x i32> %x, @@ -3790,8 +3790,8 @@ define i1 @ugtKnownBits(i8 %a) { define <2 x i1> @ugtKnownBitsVec(<2 x i8> %a) { ; CHECK-LABEL: @ugtKnownBitsVec( -; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[B]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 17) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[B]], splat (i8 17) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %b = and <2 x i8> %a, @@ -4069,9 +4069,9 @@ define i1 @knownbits3(i8 %a, i8 %b) { define <2 x i1> @knownbits4(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @knownbits4( -; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], +; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 1) +; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 2) +; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], splat (i8 1) ; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], [[A1]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -4119,9 +4119,9 @@ define i1 @knownbits6(i8 %a, i8 %b) { define <2 x i1> @knownbits7(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @knownbits7( -; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], +; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 -127) +; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 2) +; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], splat (i8 1) ; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[TMP1]], [[A1]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -4240,7 +4240,7 @@ define i1 @signbit_bitcast_fptrunc(float %x) { define <2 x i1> @signbit_bitcast_fptrunc_vec(<2 x double> %x) { ; CHECK-LABEL: @signbit_bitcast_fptrunc_vec( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[X:%.*]] to <2 x i64> -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i64> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i64> [[TMP1]], splat (i64 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %f = fptrunc <2 x double> %x to <2 x half> @@ -4266,7 +4266,7 @@ define <4 x i1> @signbit_bitcast_fpext_vec_wrong_bitcast(<2 x half> %x) { ; CHECK-LABEL: @signbit_bitcast_fpext_vec_wrong_bitcast( ; CHECK-NEXT: [[F:%.*]] = fpext <2 x half> [[X:%.*]] to <2 x float> ; CHECK-NEXT: [[B:%.*]] = bitcast <2 x float> [[F]] to <4 x i16> -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i16> [[B]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i16> [[B]], splat (i16 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %f = fpext <2 x half> %x to <2 x float> @@ -4536,9 +4536,9 @@ define i1 @redundant_sign_bit_count_i8(i8 %x) { define <2 x i1> @redundant_sign_bit_count_ult_31_30_vector(<2 x i32> %xsrc) { ; CHECK-LABEL: @redundant_sign_bit_count_ult_31_30_vector( -; CHECK-NEXT: [[X:%.*]] = mul <2 x i32> [[XSRC:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X]], -; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i32> [[TMP1]], +; CHECK-NEXT: [[X:%.*]] = mul <2 x i32> [[XSRC:%.*]], splat (i32 13) +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X]], splat (i32 1073741824) +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %x = mul <2 x i32> %xsrc, ; thwart complexity-based canonicalization @@ -4612,10 +4612,10 @@ define i1 @zext_bool_and_eq0(i1 %x, i8 %y) { define <2 x i1> @zext_bool_and_eq0_commute(<2 x i1> %x, <2 x i8> %p) { ; CHECK-LABEL: @zext_bool_and_eq0_commute( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[P:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[P:%.*]], splat (i8 1) ; CHECK-NEXT: [[R1:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[NOT_X:%.*]] = xor <2 x i1> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[NOT_X]], <2 x i1> , <2 x i1> [[R1]] +; CHECK-NEXT: [[NOT_X:%.*]] = xor <2 x i1> [[X:%.*]], splat (i1 true) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[NOT_X]], <2 x i1> splat (i1 true), <2 x i1> [[R1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %y = mul <2 x i8> %p, %p ; thwart complexity-based canonicalization @@ -4742,7 +4742,7 @@ define i1 @or_positive_sgt_zero(i8 %a) { define <2 x i1> @or_postive_sgt_zero_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_postive_sgt_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -4773,7 +4773,7 @@ define i1 @or_positive_sge_zero(i8 %a) { define <2 x i1> @or_postive_sge_zero_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_postive_sge_zero_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -4804,7 +4804,7 @@ define i1 @or_positive_sge_postive(i8 %a) { define <2 x i1> @or_postive_sge_positive_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_postive_sge_positive_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -4930,7 +4930,7 @@ define i1 @or_positive_sgt_neg(i8 %a) { define <2 x i1> @or_postive_sgt_neg_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_postive_sgt_neg_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -4952,8 +4952,8 @@ define i1 @mul_or_positive_sge_neg(i8 %a) { define <2 x i1> @or_postive_sge_neg_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_postive_sge_neg_vec( -; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[B]], +; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 24) +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[B]], splat (i8 -2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -4975,8 +4975,8 @@ define i1 @mul_or_small_sge_large(i8 %a) { define <2 x i1> @or_small_sge_large_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_small_sge_large_vec( -; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[B]], +; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 24) +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[B]], splat (i8 24) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -5019,8 +5019,8 @@ define i1 @or_positive_slt_neg(i8 %a) { define <2 x i1> @or_postive_slt_neg_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_postive_slt_neg_vec( -; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[B]], +; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 24) +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[B]], splat (i8 -1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -5042,8 +5042,8 @@ define i1 @or_small_slt_large(i8 %a) { define <2 x i1> @or_small_slt_large_vec(<2 x i8> %a) { ; CHECK-LABEL: @or_small_slt_large_vec( -; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[B]], +; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 24) +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[B]], splat (i8 25) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; @@ -5303,7 +5303,7 @@ define i1 @test_icmp_shl_nsw_i31(i31 %x) { define <2 x i1> @test_icmp_shl_vec(<2 x i64> %x) { ; CHECK-LABEL: @test_icmp_shl_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 3) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %shl = shl <2 x i64> %x, splat(i64 32) diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll index 6cbb2a246f5a4a..8bc915e695aa75 100644 --- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll @@ -725,7 +725,7 @@ define <4 x float> @insert_demanded_element_op1(<4 x float> %x, <4 x float> %y) define <4 x float> @splat_constant(<4 x float> %x) { ; CHECK-LABEL: @splat_constant( ; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x float> [[X:%.*]], float 3.000000e+00, i64 3 -; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[INS3]], +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[INS3]], splat (float 3.000000e+00) ; CHECK-NEXT: ret <4 x float> [[R]] ; %ins3 = insertelement <4 x float> %x, float 3.0, i32 3 diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll index c87e2e8596c62d..f51e444a815c80 100644 --- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -781,7 +781,7 @@ define <5 x float> @insert_demanded_element_unequal_length_op1(<4 x float> %x, < define <4 x float> @splat_constant(<4 x float> %x) { ; CHECK-LABEL: @splat_constant( ; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x float> [[X:%.*]], float 3.000000e+00, i64 3 -; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[INS3]], +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[INS3]], splat (float 3.000000e+00) ; CHECK-NEXT: ret <4 x float> [[R]] ; %ins3 = insertelement <4 x float> %x, float 3.0, i32 3 diff --git a/llvm/test/Transforms/InstCombine/insertelement.ll b/llvm/test/Transforms/InstCombine/insertelement.ll index c8df2db6e70caa..72dcd929ba0cb8 100644 --- a/llvm/test/Transforms/InstCombine/insertelement.ll +++ b/llvm/test/Transforms/InstCombine/insertelement.ll @@ -15,7 +15,7 @@ define <4 x i32> @insert_known_idx(<4 x i32> %x) { define <4 x i32> @insert_unknown_idx(<4 x i32> %x, i32 %idx) { ; CHECK-LABEL: @insert_unknown_idx( -; CHECK-NEXT: [[V1:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[V1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 7) ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 6, i32 [[IDX:%.*]] ; CHECK-NEXT: ret <4 x i32> [[V2]] ; @@ -27,7 +27,7 @@ define <4 x i32> @insert_unknown_idx(<4 x i32> %x, i32 %idx) { define <2 x i8> @insert_known_any_idx(<2 x i8> %xx, i8 %yy, i32 %idx) { ; CHECK-LABEL: @insert_known_any_idx( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 16) ; %x = or <2 x i8> %xx, %y = or i8 %yy, 16 @@ -42,7 +42,7 @@ define <2 x i8> @insert_known_any_idx_fail1(<2 x i8> %xx, i8 %yy, i32 %idx) { ; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], ; CHECK-NEXT: [[Y:%.*]] = or i8 [[YY:%.*]], 16 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 [[IDX:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[INS]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[INS]], splat (i8 16) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x = or <2 x i8> %xx, @@ -59,7 +59,7 @@ define <2 x i8> @insert_known_any_idx_fail2(<2 x i8> %xx, i8 %yy, i32 %idx) { ; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], ; CHECK-NEXT: [[Y:%.*]] = or i8 [[YY:%.*]], 15 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 [[IDX:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[INS]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[INS]], splat (i8 16) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x = or <2 x i8> %xx, diff --git a/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll b/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll index afd56abf40a507..4d8993dfb33159 100644 --- a/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll +++ b/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll @@ -74,8 +74,8 @@ define i8 @t3_commutative(i8 %x) { ; Basic splat vector test define <2 x i8> @t4_splat(<2 x i8> %x) { ; CHECK-LABEL: @t4_splat( -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15) +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], splat (i8 -16) ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -89,8 +89,8 @@ define <2 x i8> @t4_splat(<2 x i8> %x) { ; Splat-with-poison define <2 x i8> @t5_splat_poison_0b0001(<2 x i8> %x) { ; CHECK-LABEL: @t5_splat_poison_0b0001( -; CHECK-NEXT: [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], +; CHECK-NEXT: [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15) +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], splat (i8 -16) ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -102,8 +102,8 @@ define <2 x i8> @t5_splat_poison_0b0001(<2 x i8> %x) { } define <2 x i8> @t5_splat_poison_0b0010(<2 x i8> %x) { ; CHECK-LABEL: @t5_splat_poison_0b0010( -; CHECK-NEXT: [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], +; CHECK-NEXT: [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15) +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], splat (i8 -16) ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -115,8 +115,8 @@ define <2 x i8> @t5_splat_poison_0b0010(<2 x i8> %x) { } define <2 x i8> @t5_splat_poison_0b0100(<2 x i8> %x) { ; CHECK-LABEL: @t5_splat_poison_0b0100( -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15) +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], splat (i8 -16) ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -128,8 +128,8 @@ define <2 x i8> @t5_splat_poison_0b0100(<2 x i8> %x) { } define <2 x i8> @t5_splat_poison_0b1000(<2 x i8> %x) { ; CHECK-LABEL: @t5_splat_poison_0b1000( -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15) +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], splat (i8 -16) ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -161,10 +161,10 @@ define <2 x i8> @t6_nonsplat(<2 x i8> %x) { ; Even if the alignment (and masks) are splat, the bias could be non-splat define <2 x i8> @t7_nonsplat_bias(<2 x i8> %x) { ; CHECK-LABEL: @t7_nonsplat_bias( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 15) ; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], zeroinitializer ; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], splat (i8 -16) ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i8> [[X]], <2 x i8> [[X_BIASED_HIGHBITS]] ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; @@ -441,9 +441,9 @@ define i8 @t17_oneuse(i8 %x) { ; more poisonous. define <2 x i4> @t18_replacement_0b0001(<2 x i4> %x) { ; CHECK-LABEL: @t18_replacement_0b0001( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 3) ; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i4> [[X_LOWBITS]], zeroinitializer -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X]], +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X]], splat (i4 3) ; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], ; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]]) ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i4> [[X]], <2 x i4> [[X_BIASED_HIGHBITS]] @@ -461,10 +461,10 @@ define <2 x i4> @t18_replacement_0b0001(<2 x i4> %x) { ; more poisonous. define <2 x i4> @t18_replacement_0b0010(<2 x i4> %x) { ; CHECK-LABEL: @t18_replacement_0b0010( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 3) ; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i4> [[X_LOWBITS]], zeroinitializer ; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], splat (i4 -4) ; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]]) ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i4> [[X]], <2 x i4> [[X_BIASED_HIGHBITS]] ; CHECK-NEXT: ret <2 x i4> [[X_ROUNDEDUP]] @@ -479,8 +479,8 @@ define <2 x i4> @t18_replacement_0b0010(<2 x i4> %x) { } define <2 x i4> @t18_replacement_0b0100(<2 x i4> %x) { ; CHECK-LABEL: @t18_replacement_0b0100( -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], splat (i4 3) +; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], splat (i4 -4) ; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]]) ; CHECK-NEXT: ret <2 x i4> [[X_BIASED_HIGHBITS]] ; @@ -494,8 +494,8 @@ define <2 x i4> @t18_replacement_0b0100(<2 x i4> %x) { } define <2 x i4> @t18_replacement_0b1000(<2 x i4> %x) { ; CHECK-LABEL: @t18_replacement_0b1000( -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], splat (i4 3) +; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], splat (i4 -4) ; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]]) ; CHECK-NEXT: ret <2 x i4> [[X_BIASED_HIGHBITS]] ; diff --git a/llvm/test/Transforms/InstCombine/intrinsic-select.ll b/llvm/test/Transforms/InstCombine/intrinsic-select.ll index f110d7765830ef..4ce2908a630785 100644 --- a/llvm/test/Transforms/InstCombine/intrinsic-select.ll +++ b/llvm/test/Transforms/InstCombine/intrinsic-select.ll @@ -297,7 +297,7 @@ define double @test_fabs_select1(double %a) { define <2 x double> @test_fabs_select1_vec(<2 x double> %a) { ; CHECK-LABEL: @test_fabs_select1_vec( ; CHECK-NEXT: [[COND:%.*]] = fcmp uno <2 x double> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[COND]], <2 x double> , <2 x double> [[A]] +; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[COND]], <2 x double> splat (double 0x7FF8000000000000), <2 x double> [[A]] ; CHECK-NEXT: ret <2 x double> [[SEL2]] ; %cond = fcmp uno <2 x double> %a, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/intrinsics.ll b/llvm/test/Transforms/InstCombine/intrinsics.ll index c8d70e17cd3921..9dceb419215e55 100644 --- a/llvm/test/Transforms/InstCombine/intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/intrinsics.ll @@ -62,7 +62,7 @@ define i32 @cttz(i32 %a) { define <2 x i32> @cttz_vec(<2 x i32> %a) { ; CHECK-LABEL: @cttz_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 3) ; %or = or <2 x i32> %a, %and = and <2 x i32> %or, @@ -90,7 +90,7 @@ define i1 @cttz_i1_zero_is_poison(i1 %arg) { define <2 x i1> @cttz_v2i1(<2 x i1> %arg) { ; CHECK-LABEL: @cttz_v2i1( -; CHECK-NEXT: [[CNT:%.*]] = xor <2 x i1> [[ARG:%.*]], +; CHECK-NEXT: [[CNT:%.*]] = xor <2 x i1> [[ARG:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[CNT]] ; %cnt = call <2 x i1> @llvm.cttz.v2i1(<2 x i1> %arg, i1 false) nounwind readnone @@ -149,7 +149,7 @@ define i32 @cttz_knownbits2(i32 %arg) { define <2 x i32> @cttz_knownbits2_vec(<2 x i32> %arg) { ; CHECK-LABEL: @cttz_knownbits2_vec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], splat (i32 4) ; CHECK-NEXT: [[CNT:%.*]] = call range(i32 0, 3) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true) #[[ATTR2]] ; CHECK-NEXT: ret <2 x i32> [[CNT]] ; @@ -190,7 +190,7 @@ define i8 @ctlz(i8 %a) { define <2 x i8> @ctlz_vec(<2 x i8> %a) { ; CHECK-LABEL: @ctlz_vec( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 2) ; %or = or <2 x i8> %a, %and = and <2 x i8> %or, @@ -218,7 +218,7 @@ define i1 @ctlz_i1_zero_is_poison(i1 %arg) { define <2 x i1> @ctlz_v2i1(<2 x i1> %arg) { ; CHECK-LABEL: @ctlz_v2i1( -; CHECK-NEXT: [[CNT:%.*]] = xor <2 x i1> [[ARG:%.*]], +; CHECK-NEXT: [[CNT:%.*]] = xor <2 x i1> [[ARG:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[CNT]] ; %cnt = call <2 x i1> @llvm.ctlz.v2i1(<2 x i1> %arg, i1 false) nounwind readnone @@ -266,7 +266,7 @@ define i8 @ctlz_knownbits2(i8 %arg) { define <2 x i8> @ctlz_knownbits2_vec(<2 x i8> %arg) { ; CHECK-LABEL: @ctlz_knownbits2_vec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], splat (i8 32) ; CHECK-NEXT: [[CNT:%.*]] = call range(i8 0, 3) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[OR]], i1 true) #[[ATTR2]] ; CHECK-NEXT: ret <2 x i8> [[CNT]] ; @@ -324,7 +324,7 @@ define i32 @ctlz_no_zero(i32 %a) { define <2 x i32> @ctlz_no_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @ctlz_no_zero_vec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 8) ; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 29) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[OR]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[CTLZ]] ; @@ -362,7 +362,7 @@ define i32 @cttz_no_zero(i32 %a) { define <2 x i32> @cttz_no_zero_vec(<2 x i32> %a) { ; CHECK-LABEL: @cttz_no_zero_vec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 8) ; CHECK-NEXT: [[CTTZ:%.*]] = tail call range(i32 0, 4) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[CTTZ]] ; diff --git a/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll b/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll index 0440199dadb873..c73b07cfbc6681 100644 --- a/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll +++ b/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll @@ -52,7 +52,7 @@ define <2 x i4> @in_constant_varx_mone_invmask(<2 x i4> %x, <2 x i4> %mask) { define <2 x i4> @in_constant_varx_6_invmask(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_varx_6_invmask( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], splat (i4 6) ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i4> [[R]] @@ -94,7 +94,7 @@ define <3 x i4> @in_constant_varx_6_invmask_poison(<3 x i4> %x, <3 x i4> %mask) define <2 x i4> @in_constant_mone_vary_invmask(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_mone_vary_invmask( -; CHECK-NEXT: [[MASK_NOT:%.*]] = xor <2 x i4> [[MASK:%.*]], +; CHECK-NEXT: [[MASK_NOT:%.*]] = xor <2 x i4> [[MASK:%.*]], splat (i4 -1) ; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[Y:%.*]], [[MASK_NOT]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -107,9 +107,9 @@ define <2 x i4> @in_constant_mone_vary_invmask(<2 x i4> %y, <2 x i4> %mask) { define <2 x i4> @in_constant_6_vary_invmask(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_6_vary_invmask( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], splat (i4 6) ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 6) ; CHECK-NEXT: ret <2 x i4> [[R]] ; %notmask = xor <2 x i4> %mask, @@ -268,7 +268,7 @@ define <2 x i4> @c_1_1_1 (<2 x i4> %m) { define <2 x i4> @commutativity_constant_varx_6_invmask(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @commutativity_constant_varx_6_invmask( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], splat (i4 6) ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i4> [[R]] @@ -282,9 +282,9 @@ define <2 x i4> @commutativity_constant_varx_6_invmask(<2 x i4> %x, <2 x i4> %ma define <2 x i4> @commutativity_constant_6_vary_invmask(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @commutativity_constant_6_vary_invmask( -; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], splat (i4 6) ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 6) ; CHECK-NEXT: ret <2 x i4> [[R]] ; %notmask = xor <2 x i4> %mask, @@ -320,7 +320,7 @@ define <2 x i4> @n_oneuse_D_is_ok (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { define <2 x i4> @n_oneuse_A (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { ; CHECK-LABEL: @n_oneuse_A( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], splat (i4 -1) ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] @@ -337,7 +337,7 @@ define <2 x i4> @n_oneuse_A (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { define <2 x i4> @n_oneuse_AD (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { ; CHECK-LABEL: @n_oneuse_AD( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], splat (i4 -1) ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] @@ -358,7 +358,7 @@ define <2 x i4> @n_oneuse_AD (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { define <2 x i4> @n_third_var (<2 x i4> %x, <2 x i4> %y, <2 x i4> %z, <2 x i4> %m) { ; CHECK-LABEL: @n_third_var( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], splat (i4 -1) ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Z:%.*]] @@ -374,7 +374,7 @@ define <2 x i4> @n_third_var (<2 x i4> %x, <2 x i4> %y, <2 x i4> %z, <2 x i4> %m define <2 x i4> @n_third_var_const(<2 x i4> %x, <2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @n_third_var_const( -; CHECK-NEXT: [[NOTMASK:%.*]] = xor <2 x i4> [[MASK:%.*]], +; CHECK-NEXT: [[NOTMASK:%.*]] = xor <2 x i4> [[MASK:%.*]], splat (i4 -1) ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], ; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[NOTMASK]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], @@ -391,7 +391,7 @@ define <2 x i4> @n_third_var_const(<2 x i4> %x, <2 x i4> %y, <2 x i4> %mask) { define <2 x i4> @n_badxor_splat (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { ; CHECK-LABEL: @n_badxor_splat( -; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], +; CHECK-NEXT: [[IM:%.*]] = xor <2 x i4> [[M:%.*]], splat (i4 1) ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] diff --git a/llvm/test/Transforms/InstCombine/is_fpclass.ll b/llvm/test/Transforms/InstCombine/is_fpclass.ll index 24a331dfb48bb0..c1809b8bec61cb 100644 --- a/llvm/test/Transforms/InstCombine/is_fpclass.ll +++ b/llvm/test/Transforms/InstCombine/is_fpclass.ll @@ -504,7 +504,7 @@ define i1 @test_class_is_pinf_or_nan_f32(float %x) { define <2 x i1> @test_class_is_pinf_v2f32(<2 x float> %x) { ; CHECK-LABEL: @test_class_is_pinf_v2f32( -; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq <2 x float> [[X:%.*]], +; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq <2 x float> [[X:%.*]], splat (float 0x7FF0000000000000) ; CHECK-NEXT: ret <2 x i1> [[VAL]] ; %val = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 512) ; fcPosInf @@ -531,7 +531,7 @@ define i1 @test_class_is_ninf_or_nan_f32(float %x) { define <2 x i1> @test_class_is_ninf_v2f32(<2 x float> %x) { ; CHECK-LABEL: @test_class_is_ninf_v2f32( -; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq <2 x float> [[X:%.*]], +; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq <2 x float> [[X:%.*]], splat (float 0xFFF0000000000000) ; CHECK-NEXT: ret <2 x i1> [[VAL]] ; %val = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 4) ; fcNegInf @@ -551,7 +551,7 @@ define i1 @test_class_is_inf_f32(float %x) { define <2 x i1> @test_class_is_inf_v2f32(<2 x float> %x) { ; CHECK-LABEL: @test_class_is_inf_v2f32( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]]) -; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq <2 x float> [[TMP1]], +; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq <2 x float> [[TMP1]], splat (float 0x7FF0000000000000) ; CHECK-NEXT: ret <2 x i1> [[VAL]] ; %val = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 516) ; fcInf @@ -1300,7 +1300,7 @@ define i1 @test_no_fold_and_class_f32_0(float %a, float %b) { define <2 x i1> @test_fold_and_class_v2f32(<2 x float> %a) { ; CHECK-LABEL: @test_fold_and_class_v2f32( -; CHECK-NEXT: [[CLASS1:%.*]] = fcmp ueq <2 x float> [[A:%.*]], +; CHECK-NEXT: [[CLASS1:%.*]] = fcmp ueq <2 x float> [[A:%.*]], splat (float 0xFFF0000000000000) ; CHECK-NEXT: ret <2 x i1> [[CLASS1]] ; %class0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 7) diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll index 7ace998556c703..df697e686d986c 100644 --- a/llvm/test/Transforms/InstCombine/ispow2.ll +++ b/llvm/test/Transforms/InstCombine/ispow2.ll @@ -16,7 +16,7 @@ define i1 @is_pow2or0_negate_op(i32 %x) { define <2 x i1> @is_pow2or0_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @is_pow2or0_negate_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult <2 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %neg = sub <2 x i32> zeroinitializer, %x @@ -40,7 +40,7 @@ define i1 @is_pow2or0_decrement_op(i8 %x) { define <2 x i1> @is_pow2or0_decrement_op_vec(<2 x i8> %x) { ; CHECK-LABEL: @is_pow2or0_decrement_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult <2 x i8> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult <2 x i8> [[TMP1]], splat (i8 2) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %dec = add <2 x i8> %x, @@ -64,7 +64,7 @@ define i1 @isnot_pow2or0_negate_op(i32 %x) { define <2 x i1> @isnot_pow2or0_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @isnot_pow2or0_negate_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ugt <2 x i32> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ugt <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %neg = sub <2 x i32> zeroinitializer, %x @@ -88,7 +88,7 @@ define i1 @isnot_pow2or0_decrement_op(i8 %x) { define <2 x i1> @isnot_pow2or0_decrement_op_vec(<2 x i8> %x) { ; CHECK-LABEL: @isnot_pow2or0_decrement_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ugt <2 x i8> [[TMP1]], +; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ugt <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %dec = add <2 x i8> %x, @@ -312,7 +312,7 @@ define i1 @is_pow2_ctpop_extra_uses_logical(i32 %x) { define <2 x i1> @is_pow2_ctpop_commute_vec(<2 x i8> %x) { ; CHECK-LABEL: @is_pow2_ctpop_commute_vec( ; CHECK-NEXT: [[T0:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[T0]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[T0]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x) @@ -519,7 +519,7 @@ define i1 @isnot_pow2_ctpop_extra_uses_logical(i32 %x) { define <2 x i1> @isnot_pow2_ctpop_commute_vec(<2 x i8> %x) { ; CHECK-LABEL: @isnot_pow2_ctpop_commute_vec( ; CHECK-NEXT: [[T0:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[T0]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[T0]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x) @@ -656,7 +656,7 @@ define i1 @is_pow2_negate_op_logical(i32 %x) { define <2 x i1> @is_pow2_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @is_pow2_negate_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %neg = sub <2 x i32> zeroinitializer, %x @@ -698,7 +698,7 @@ define i1 @is_pow2_decrement_op_logical(i8 %x) { define <2 x i1> @is_pow2_decrement_op_vec(<2 x i8> %x) { ; CHECK-LABEL: @is_pow2_decrement_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %dec = add <2 x i8> %x, @@ -740,7 +740,7 @@ define i1 @isnot_pow2_negate_op_logical(i32 %x) { define <2 x i1> @isnot_pow2_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @isnot_pow2_negate_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %neg = sub <2 x i32> zeroinitializer, %x @@ -782,7 +782,7 @@ define i1 @isnot_pow2_decrement_op_logical(i8 %x) { define <2 x i1> @isnot_pow2_decrement_op_vec(<2 x i8> %x) { ; CHECK-LABEL: @isnot_pow2_decrement_op_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %dec = add <2 x i8> %x, @@ -837,7 +837,7 @@ define i1 @is_pow2or0_ctpop_logical(i32 %x) { define <2 x i1> @is_pow2or0_ctpop_commute_vec(<2 x i8> %x) { ; CHECK-LABEL: @is_pow2or0_ctpop_commute_vec( ; CHECK-NEXT: [[T0:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp samesign ult <2 x i8> [[T0]], +; CHECK-NEXT: [[R:%.*]] = icmp samesign ult <2 x i8> [[T0]], splat (i8 2) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x) @@ -1030,7 +1030,7 @@ define i1 @isnot_pow2nor0_ctpop_logical(i32 %x) { define <2 x i1> @isnot_pow2nor0_ctpop_commute_vec(<2 x i8> %x) { ; CHECK-LABEL: @isnot_pow2nor0_ctpop_commute_vec( ; CHECK-NEXT: [[T0:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[R:%.*]] = icmp samesign ugt <2 x i8> [[T0]], +; CHECK-NEXT: [[R:%.*]] = icmp samesign ugt <2 x i8> [[T0]], splat (i8 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x) @@ -1170,7 +1170,7 @@ define i1 @isnot_pow2nor0_ctpop_wrong_pred2_logical(i32 %x) { define <2 x i1> @isnot_pow2nor0_wrong_pred3_ctpop_commute_vec(<2 x i8> %x) { ; CHECK-LABEL: @isnot_pow2nor0_wrong_pred3_ctpop_commute_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x) %cmp = icmp ne <2 x i8> %t0, @@ -1492,7 +1492,7 @@ define i1 @is_pow2_or_z_known_bits(i32 %xin) { define <2 x i1> @not_pow2_or_z_known_bits(<2 x i32> %xin) { ; CHECK-LABEL: @not_pow2_or_z_known_bits( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[XIN:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[XIN:%.*]], splat (i32 -65) ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -1504,9 +1504,9 @@ define <2 x i1> @not_pow2_or_z_known_bits(<2 x i32> %xin) { define <2 x i1> @not_pow2_or_z_known_bits_fail_wrong_cmp(<2 x i32> %xin) { ; CHECK-LABEL: @not_pow2_or_z_known_bits_fail_wrong_cmp( -; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[XIN:%.*]], +; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[XIN:%.*]], splat (i32 64) ; CHECK-NEXT: [[CNT:%.*]] = call range(i32 1, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X]]) -; CHECK-NEXT: [[R:%.*]] = icmp samesign ugt <2 x i32> [[CNT]], +; CHECK-NEXT: [[R:%.*]] = icmp samesign ugt <2 x i32> [[CNT]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %x = or <2 x i32> %xin, diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index aa24ec3905f133..c9e7cbae3d9395 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -939,10 +939,10 @@ define i1 @extract_value_uadd_fail2(<2 x i8> %xx, <2 x i8> %yy, i32 %idx) { define i1 @extract_value_uadd_fail3(<2 x i8> %xx, <2 x i8> %yy) { ; CHECK-LABEL: @extract_value_uadd_fail3( -; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], -; CHECK-NEXT: [[Y0:%.*]] = and <2 x i8> [[YY:%.*]], -; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[X0]], -; CHECK-NEXT: [[Y:%.*]] = add nuw <2 x i8> [[Y0]], +; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], splat (i8 127) +; CHECK-NEXT: [[Y0:%.*]] = and <2 x i8> [[YY:%.*]], splat (i8 127) +; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[X0]], splat (i8 1) +; CHECK-NEXT: [[Y:%.*]] = add nuw <2 x i8> [[Y0]], splat (i8 1) ; CHECK-NEXT: [[ADD_UOV:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[X]], <2 x i8> [[Y]]) ; CHECK-NEXT: [[ADD:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 0 ; CHECK-NEXT: [[UOV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 1 diff --git a/llvm/test/Transforms/InstCombine/ldexp-ext.ll b/llvm/test/Transforms/InstCombine/ldexp-ext.ll index 58710005d6cce0..3fac72a7858e48 100644 --- a/llvm/test/Transforms/InstCombine/ldexp-ext.ll +++ b/llvm/test/Transforms/InstCombine/ldexp-ext.ll @@ -47,7 +47,7 @@ define double @ldexp_zext_double_fast_math(double %x, i1 %bool) { define <2 x float> @ldexp_zext_float_vector(<2 x float> %x, <2 x i1> %bool) { ; CHECK-LABEL: @ldexp_zext_float_vector( -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> , <2 x float> +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> splat (float 2.000000e+00), <2 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[LDEXP:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x float> [[LDEXP]] ; @@ -102,7 +102,7 @@ define double @ldexp_sext_double_fast_math(double %x, i1 %bool) { define <2 x float> @ldexp_sext_float_vector(<2 x float> %x, <2 x i1> %bool) { ; CHECK-LABEL: @ldexp_sext_float_vector( -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> , <2 x float> +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> splat (float 5.000000e-01), <2 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[LDEXP:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x float> [[LDEXP]] ; diff --git a/llvm/test/Transforms/InstCombine/ldexp.ll b/llvm/test/Transforms/InstCombine/ldexp.ll index 7ae483cbda0e22..8908d476b4a2c0 100644 --- a/llvm/test/Transforms/InstCombine/ldexp.ll +++ b/llvm/test/Transforms/InstCombine/ldexp.ll @@ -830,7 +830,7 @@ define float @ldexp_127(float %x) { define <2 x float> @ldexp_3_vector(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @ldexp_3_vector ; CHECK-SAME: (<2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[LDEXP:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: [[LDEXP:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> splat (i32 3)) ; CHECK-NEXT: ret <2 x float> [[LDEXP]] ; %ldexp = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %x, <2 x i32> ) diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll index dbc68044c11a0d..b3fa3dae80379f 100644 --- a/llvm/test/Transforms/InstCombine/load-store-forward.ll +++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll @@ -347,7 +347,7 @@ define i1 @load_after_memset_1_i1(ptr %a) { define <4 x i8> @load_after_memset_1_vec(ptr %a) { ; CHECK-LABEL: @load_after_memset_1_vec( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) -; CHECK-NEXT: ret <4 x i8> +; CHECK-NEXT: ret <4 x i8> splat (i8 1) ; call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) %v = load <4 x i8>, ptr %a diff --git a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll index 58a9c8163c077b..918ea605a10bfd 100644 --- a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll +++ b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll @@ -21,8 +21,8 @@ define void @combine_masked_load_store_from_constant_array(ptr %ptr) { define void @combine_masked_expandload_compressstore_from_constant_array(ptr %ptr) { ; CHECK-LABEL: @combine_masked_expandload_compressstore_from_constant_array( -; CHECK-NEXT: [[TMP1:%.*]] = call <10 x i64> @llvm.masked.expandload.v10i64(ptr nonnull @contant_int_array, <10 x i1> , <10 x i64> zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.compressstore.v10i64(<10 x i64> [[TMP1]], ptr [[PTR:%.*]], <10 x i1> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <10 x i64> @llvm.masked.expandload.v10i64(ptr nonnull @contant_int_array, <10 x i1> splat (i1 true), <10 x i64> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.compressstore.v10i64(<10 x i64> [[TMP1]], ptr [[PTR:%.*]], <10 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; %1 = alloca [10 x i64] diff --git a/llvm/test/Transforms/InstCombine/log-pow.ll b/llvm/test/Transforms/InstCombine/log-pow.ll index bfa636e470bb45..374115953145d8 100644 --- a/llvm/test/Transforms/InstCombine/log-pow.ll +++ b/llvm/test/Transforms/InstCombine/log-pow.ll @@ -137,7 +137,7 @@ define double @log10_exp(double %x) { define <2 x float> @logv_exp2v(<2 x float> %x) { ; CHECK-LABEL: @logv_exp2v( -; CHECK-NEXT: [[MUL:%.*]] = fmul fast <2 x float> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 0x3FE62E4300000000) ; CHECK-NEXT: ret <2 x float> [[MUL]] ; %exp = call fast <2 x float> @llvm.exp2.v2f32(<2 x float> %x) diff --git a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll index cf0dc350328846..9e0c98bb340068 100644 --- a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll @@ -345,7 +345,7 @@ define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i6 ; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> ; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[A:%.*]], [[BC1]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> -; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], +; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) ; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[B:%.*]], [[BC2]] ; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]] ; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]] diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll index 62a63839704a44..1b6e816d2e624e 100644 --- a/llvm/test/Transforms/InstCombine/logical-select.ll +++ b/llvm/test/Transforms/InstCombine/logical-select.ll @@ -350,7 +350,7 @@ define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i6 ; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> ; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[A:%.*]], [[BC1]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> -; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], +; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) ; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[B:%.*]], [[BC2]] ; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]] ; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]] @@ -764,7 +764,7 @@ define <8 x i3> @bitcast_vec_cond_commute1(<3 x i1> noundef %cond, <8 x i3> %pc, ; CHECK-NEXT: [[C:%.*]] = mul <8 x i3> [[PC:%.*]], [[PC]] ; CHECK-NEXT: [[S:%.*]] = sext <3 x i1> [[COND:%.*]] to <3 x i8> ; CHECK-NEXT: [[T9:%.*]] = bitcast <3 x i8> [[S]] to <8 x i3> -; CHECK-NEXT: [[NOTT9:%.*]] = xor <8 x i3> [[T9]], +; CHECK-NEXT: [[NOTT9:%.*]] = xor <8 x i3> [[T9]], splat (i3 -1) ; CHECK-NEXT: [[T11:%.*]] = and <8 x i3> [[C]], [[NOTT9]] ; CHECK-NEXT: [[T12:%.*]] = and <8 x i3> [[D:%.*]], [[T9]] ; CHECK-NEXT: [[R:%.*]] = or disjoint <8 x i3> [[T11]], [[T12]] @@ -830,7 +830,7 @@ define <2 x i16> @bitcast_vec_cond_commute3(<4 x i8> %cond, <2 x i16> %pc, <2 x define <2 x i64> @bitcast_fp_vec_cond(<2 x double> noundef %s, <2 x i64> %c, <2 x i64> %d) { ; CHECK-LABEL: @bitcast_fp_vec_cond( ; CHECK-NEXT: [[T9:%.*]] = bitcast <2 x double> [[S:%.*]] to <2 x i64> -; CHECK-NEXT: [[NOTT9:%.*]] = xor <2 x i64> [[T9]], +; CHECK-NEXT: [[NOTT9:%.*]] = xor <2 x i64> [[T9]], splat (i64 -1) ; CHECK-NEXT: [[T11:%.*]] = and <2 x i64> [[C:%.*]], [[NOTT9]] ; CHECK-NEXT: [[T12:%.*]] = and <2 x i64> [[D:%.*]], [[T9]] ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i64> [[T11]], [[T12]] @@ -850,7 +850,7 @@ define <2 x i64> @bitcast_int_vec_cond(i1 noundef %b, <2 x i64> %c, <2 x i64> %d ; CHECK-LABEL: @bitcast_int_vec_cond( ; CHECK-NEXT: [[S:%.*]] = sext i1 [[B:%.*]] to i128 ; CHECK-NEXT: [[T9:%.*]] = bitcast i128 [[S]] to <2 x i64> -; CHECK-NEXT: [[NOTT9:%.*]] = xor <2 x i64> [[T9]], +; CHECK-NEXT: [[NOTT9:%.*]] = xor <2 x i64> [[T9]], splat (i64 -1) ; CHECK-NEXT: [[T11:%.*]] = and <2 x i64> [[C:%.*]], [[NOTT9]] ; CHECK-NEXT: [[T12:%.*]] = and <2 x i64> [[D:%.*]], [[T9]] ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i64> [[T11]], [[T12]] @@ -1043,7 +1043,7 @@ define i1 @not_d_bools_commute11(i1 %c, i1 %x, i1 %y) { define <2 x i1> @not_d_bools_vector(<2 x i1> %c, <2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @not_d_bools_vector( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[Y:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i1> [[X:%.*]], <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -1056,7 +1056,7 @@ define <2 x i1> @not_d_bools_vector(<2 x i1> %c, <2 x i1> %x, <2 x i1> %y) { define <2 x i1> @not_d_bools_vector_poison(<2 x i1> %c, <2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @not_d_bools_vector_poison( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[Y:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i1> [[X:%.*]], <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -1181,8 +1181,8 @@ define i1 @logical_and_or_with_common_not_op_variant3(i1 %a, i1 %b) { ; A & (~A | B) --> A & B define <2 x i1> @logical_and_or_with_common_not_op_variant4(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @logical_and_or_with_common_not_op_variant4( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], -; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[NOT]], <2 x i1> , <2 x i1> [[B:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[NOT]], <2 x i1> splat (i1 true), <2 x i1> [[B:%.*]] ; CHECK-NEXT: [[AND:%.*]] = select <2 x i1> [[A]], <2 x i1> [[B]], <2 x i1> zeroinitializer ; CHECK-NEXT: call void @use2(<2 x i1> [[A]]) ; CHECK-NEXT: call void @use2(<2 x i1> [[B]]) @@ -1244,7 +1244,7 @@ define i1 @logical_or_and_with_common_not_op_variant1(i1 %a, i1 %b) { ; A | (~A & B) --> A | B define <2 x i1> @logical_or_and_with_common_not_op_variant2(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @logical_or_and_with_common_not_op_variant2( -; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> , <2 x i1> [[B:%.*]] +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[B:%.*]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %not = xor <2 x i1> %a, @@ -1272,9 +1272,9 @@ define i1 @logical_or_and_with_common_not_op_variant3(i1 %a, i1 %b) { ; A | (~A & B) --> A | B define <2 x i1> @logical_or_and_with_common_not_op_variant4(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @logical_or_and_with_common_not_op_variant4( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: [[AND:%.*]] = select <2 x i1> [[NOT]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[A]], <2 x i1> , <2 x i1> [[B]] +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[A]], <2 x i1> splat (i1 true), <2 x i1> [[B]] ; CHECK-NEXT: call void @use2(<2 x i1> [[A]]) ; CHECK-NEXT: call void @use2(<2 x i1> [[B]]) ; CHECK-NEXT: call void @use2(<2 x i1> [[AND]]) diff --git a/llvm/test/Transforms/InstCombine/low-bit-splat.ll b/llvm/test/Transforms/InstCombine/low-bit-splat.ll index a86ab0359afd78..ac069fb138dbe0 100644 --- a/llvm/test/Transforms/InstCombine/low-bit-splat.ll +++ b/llvm/test/Transforms/InstCombine/low-bit-splat.ll @@ -31,7 +31,7 @@ define i16 @t1_otherbitwidth(i16 %x) { ; Basic positive vector tests define <2 x i8> @t2_vec(<2 x i8> %x) { ; CHECK-LABEL: @t2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> zeroinitializer, [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll index e1c441e9c0b446..31bf6231f7d6b1 100644 --- a/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll @@ -72,7 +72,7 @@ define i1 @scalar_i32_lshr_and_negC_ne(i32 %x, i32 %y) { define <4 x i1> @vec_4xi32_lshr_and_negC_eq(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_lshr_and_negC_eq( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y @@ -84,7 +84,7 @@ define <4 x i1> @vec_4xi32_lshr_and_negC_eq(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_lshr_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_lshr_and_negC_eq_poison1( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y @@ -96,7 +96,7 @@ define <4 x i1> @vec_lshr_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_lshr_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_lshr_and_negC_eq_poison2( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y @@ -108,7 +108,7 @@ define <4 x i1> @vec_lshr_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_lshr_and_negC_eq_poison3(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_lshr_and_negC_eq_poison3( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll index 0166680309ea8c..da3ed49c83d4ff 100644 --- a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll @@ -72,7 +72,7 @@ define i1 @scalar_i32_lshr_and_signbit_ne(i32 %x, i32 %y) { define <4 x i1> @vec_4xi32_lshr_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y @@ -84,7 +84,7 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_poison1( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y @@ -96,7 +96,7 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> % define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_poison2( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y @@ -108,7 +108,7 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> % define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison3(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_poison3( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll index a77ad3a7ea74ee..03188c0c590c43 100644 --- a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll +++ b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll @@ -64,7 +64,7 @@ define i16 @n3(i8 %x) { define <2 x i16> @t4_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @t4_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: [[C:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[C]] ; @@ -76,7 +76,7 @@ define <2 x i16> @t4_vec_splat(<2 x i8> %x) { define <2 x i16> @t5_vec_poison(<2 x i8> %x) { ; CHECK-LABEL: @t5_vec_poison( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: [[C:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[C]] ; @@ -143,7 +143,7 @@ define <2 x i16> @t9_extrause1_vec_poison(<2 x i8> %x) { ; CHECK-LABEL: @t9_extrause1_vec_poison( ; CHECK-NEXT: [[A:%.*]] = lshr <2 x i8> [[X:%.*]], ; CHECK-NEXT: call void @usevec8(<2 x i8> [[A]]) -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X]], splat (i8 4) ; CHECK-NEXT: [[C:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[C]] ; @@ -188,7 +188,7 @@ define <2 x i16> @t11_extrause2_vec_poison(<2 x i8> %x) { define <2 x i10> @wide_source_shifted_signbit(<2 x i32> %x) { ; CHECK-LABEL: @wide_source_shifted_signbit( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 24) ; CHECK-NEXT: [[C:%.*]] = trunc nsw <2 x i32> [[TMP1]] to <2 x i10> ; CHECK-NEXT: ret <2 x i10> [[C]] ; diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll index 4360714c78caa6..00626015d2ed76 100644 --- a/llvm/test/Transforms/InstCombine/lshr.ll +++ b/llvm/test/Transforms/InstCombine/lshr.ll @@ -73,7 +73,7 @@ define <2 x i8> @lshr_cttz_zero_is_not_undef_splat_vec(<2 x i8> %x) { define <2 x i8> @lshr_ctpop_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_ctpop_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[SH]] ; @@ -153,8 +153,8 @@ define i8 @lshr_exact(i8 %x) { define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_exact_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[LSHR:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 1) +; CHECK-NEXT: [[LSHR:%.*]] = and <2 x i8> [[TMP1]], splat (i8 63) ; CHECK-NEXT: ret <2 x i8> [[LSHR]] ; %shl = shl <2 x i8> %x, @@ -165,7 +165,7 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) { define <2 x i8> @lshr_exact_splat_vec_nuw(<2 x i8> %x) { ; CHECK-LABEL: @lshr_exact_splat_vec_nuw( -; CHECK-NEXT: [[LSHR:%.*]] = add nuw <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[LSHR:%.*]] = add nuw <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i8> [[LSHR]] ; %shl = shl nuw <2 x i8> %x, @@ -190,9 +190,9 @@ define i8 @shl_add(i8 %x, i8 %y) { define <2 x i8> @shl_add_commute_vec(<2 x i8> %x, <2 x i8> %py) { ; CHECK-LABEL: @shl_add_commute_vec( ; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], [[PY]] -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[Y]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[Y]], splat (i8 3) ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP2]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP2]], splat (i8 31) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %y = mul <2 x i8> %py, %py ; thwart complexity-based canonicalization @@ -289,7 +289,7 @@ define i16 @smear_sign_and_widen_should_not_change_type(i4 %x) { define <2 x i8> @smear_sign_and_widen_splat(<2 x i6> %x) { ; CHECK-LABEL: @smear_sign_and_widen_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i6> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i6> [[X:%.*]], splat (i6 2) ; CHECK-NEXT: [[HIBIT:%.*]] = zext <2 x i6> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[HIBIT]] ; @@ -324,7 +324,7 @@ define i32 @fake_sext_but_should_not_change_type(i3 %x) { define <2 x i8> @fake_sext_splat(<2 x i3> %x) { ; CHECK-LABEL: @fake_sext_splat( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i3> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i3> [[X:%.*]], splat (i3 2) ; CHECK-NEXT: [[SH:%.*]] = zext nneg <2 x i3> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[SH]] ; @@ -337,7 +337,7 @@ define <2 x i8> @fake_sext_splat(<2 x i3> %x) { define <2 x i32> @narrow_lshr_constant(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @narrow_lshr_constant( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[SH:%.*]] = zext nneg <2 x i8> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[SH]] ; @@ -359,7 +359,7 @@ define i32 @mul_splat_fold(i32 %x) { define <3 x i14> @mul_splat_fold_vec(<3 x i14> %x) { ; CHECK-LABEL: @mul_splat_fold_vec( -; CHECK-NEXT: [[M:%.*]] = mul nuw <3 x i14> [[X:%.*]], +; CHECK-NEXT: [[M:%.*]] = mul nuw <3 x i14> [[X:%.*]], splat (i14 129) ; CHECK-NEXT: call void @usevec(<3 x i14> [[M]]) ; CHECK-NEXT: ret <3 x i14> [[X]] ; @@ -787,7 +787,7 @@ define i32 @negative_and_odd(i32 %x) { define <2 x i7> @negative_and_odd_vec(<2 x i7> %x) { ; CHECK-LABEL: @negative_and_odd_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i7> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i7> [[X:%.*]], splat (i7 6) ; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[TMP1]], [[X]] ; CHECK-NEXT: ret <2 x i7> [[R]] ; @@ -851,7 +851,7 @@ define i12 @trunc_sandwich(i32 %x) { define <2 x i12> @trunc_sandwich_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @trunc_sandwich_splat_vec( -; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw <2 x i32> [[SUM_SHIFT]] to <2 x i12> ; CHECK-NEXT: ret <2 x i12> [[R1]] ; @@ -947,9 +947,9 @@ define i12 @trunc_sandwich_use1(i32 %x) { define <3 x i9> @trunc_sandwich_splat_vec_use1(<3 x i14> %x) { ; CHECK-LABEL: @trunc_sandwich_splat_vec_use1( -; CHECK-NEXT: [[SH:%.*]] = lshr <3 x i14> [[X:%.*]], +; CHECK-NEXT: [[SH:%.*]] = lshr <3 x i14> [[X:%.*]], splat (i14 6) ; CHECK-NEXT: call void @usevec(<3 x i14> [[SH]]) -; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <3 x i14> [[X]], +; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <3 x i14> [[X]], splat (i14 11) ; CHECK-NEXT: [[R1:%.*]] = trunc nuw nsw <3 x i14> [[SUM_SHIFT]] to <3 x i9> ; CHECK-NEXT: ret <3 x i9> [[R1]] ; @@ -1089,7 +1089,7 @@ define <3 x i14> @lshr_sext_i1_to_i14_splat_vec_use1(<3 x i1> %a) { ; CHECK-LABEL: @lshr_sext_i1_to_i14_splat_vec_use1( ; CHECK-NEXT: [[SEXT:%.*]] = sext <3 x i1> [[A:%.*]] to <3 x i14> ; CHECK-NEXT: call void @usevec(<3 x i14> [[SEXT]]) -; CHECK-NEXT: [[LSHR:%.*]] = select <3 x i1> [[A]], <3 x i14> , <3 x i14> zeroinitializer +; CHECK-NEXT: [[LSHR:%.*]] = select <3 x i1> [[A]], <3 x i14> splat (i14 1023), <3 x i14> zeroinitializer ; CHECK-NEXT: ret <3 x i14> [[LSHR]] ; %sext = sext <3 x i1> %a to <3 x i14> @@ -1269,7 +1269,7 @@ define <2 x i64> @narrow_bswap_splat_poison_elt(<2 x i16> %x) { define <2 x i64> @narrow_bswap_overshift(<2 x i32> %x) { ; CHECK-LABEL: @narrow_bswap_overshift( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 16) ; CHECK-NEXT: [[S:%.*]] = zext nneg <2 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[S]] ; @@ -1320,7 +1320,7 @@ define i8 @not_signbit(i8 %x) { define <2 x i6> @not_signbit_vec(<2 x i6> %x) { ; CHECK-LABEL: @not_signbit_vec( -; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i6> [[X:%.*]], +; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i6> [[X:%.*]], splat (i6 -1) ; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[ISNOTNEG]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/masked-merge-add.ll b/llvm/test/Transforms/InstCombine/masked-merge-add.ll index 5ef53ad5150137..d78a9ee59c714b 100644 --- a/llvm/test/Transforms/InstCombine/masked-merge-add.ll +++ b/llvm/test/Transforms/InstCombine/masked-merge-add.ll @@ -34,7 +34,7 @@ define i32 @p(i32 %x, i32 %y, i32 noundef %m) { define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> noundef %m) { ; CHECK-LABEL: @p_splatvec( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], [[M:%.*]] -; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M]], +; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M]], splat (i32 -1) ; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], [[NEG]] ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] @@ -95,8 +95,8 @@ define i32 @p_constmask(i32 %x, i32 %y) { define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask_splatvec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 65280) +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281) ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -151,8 +151,8 @@ define i32 @p_constmask2(i32 %x, i32 %y) { define <2 x i32> @p_constmask2_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask2_splatvec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 61440) +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281) ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll b/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll index 639478dfcc6fe8..399d6d4ac07d3f 100644 --- a/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll +++ b/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll @@ -31,7 +31,7 @@ define i32 @p(i32 %x, i32 %y, i32 %m) { define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %m) { ; CHECK-LABEL: @p_splatvec( -; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M:%.*]], +; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M:%.*]], splat (i32 -1) ; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[X:%.*]], [[NEG]] ; CHECK-NEXT: [[OR1:%.*]] = or <2 x i32> [[Y:%.*]], [[M]] ; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[OR]], [[OR1]] @@ -78,8 +78,8 @@ define i32 @p_constmask(i32 %x, i32 %y) { define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask_splatvec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[OR1:%.*]] = or <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 -65281) +; CHECK-NEXT: [[OR1:%.*]] = or <2 x i32> [[Y:%.*]], splat (i32 65280) ; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[OR]], [[OR1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/masked-merge-or.ll b/llvm/test/Transforms/InstCombine/masked-merge-or.ll index dd2ac6dfe51091..5e41279dd26b90 100644 --- a/llvm/test/Transforms/InstCombine/masked-merge-or.ll +++ b/llvm/test/Transforms/InstCombine/masked-merge-or.ll @@ -34,7 +34,7 @@ define i32 @p(i32 %x, i32 %y, i32 noundef %m) { define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> noundef %m) { ; CHECK-LABEL: @p_splatvec( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], [[M:%.*]] -; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M]], +; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M]], splat (i32 -1) ; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], [[NEG]] ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] @@ -95,8 +95,8 @@ define i32 @p_constmask(i32 %x, i32 %y) { define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask_splatvec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 65280) +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281) ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -151,8 +151,8 @@ define i32 @p_constmask2(i32 %x, i32 %y) { define <2 x i32> @p_constmask2_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask2_splatvec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 61440) +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281) ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/masked-merge-xor.ll b/llvm/test/Transforms/InstCombine/masked-merge-xor.ll index 7ed1f3fdfdab64..463f220fcbb6dd 100644 --- a/llvm/test/Transforms/InstCombine/masked-merge-xor.ll +++ b/llvm/test/Transforms/InstCombine/masked-merge-xor.ll @@ -34,7 +34,7 @@ define i32 @p(i32 %x, i32 %y, i32 noundef %m) { define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> noundef %m) { ; CHECK-LABEL: @p_splatvec( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], [[M:%.*]] -; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M]], +; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[M]], splat (i32 -1) ; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], [[NEG]] ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] @@ -95,8 +95,8 @@ define i32 @p_constmask(i32 %x, i32 %y) { define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask_splatvec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 65280) +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281) ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -151,8 +151,8 @@ define i32 @p_constmask2(i32 %x, i32 %y) { define <2 x i32> @p_constmask2_splatvec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p_constmask2_splatvec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 61440) +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281) ; CHECK-NEXT: [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll index aee8f400ff43a1..155a7fd0590e4a 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll @@ -191,7 +191,7 @@ define <2 x double> @gather_zeromask(<2 x ptr> %ptrs, <2 x double> %passthru) { define <2 x double> @gather_onemask(<2 x ptr> %ptrs, <2 x double> %passthru) { ; CHECK-LABEL: @gather_onemask( -; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[PTRS:%.*]], i32 4, <2 x i1> , <2 x double> poison) +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[PTRS:%.*]], i32 4, <2 x i1> splat (i1 true), <2 x double> poison) ; CHECK-NEXT: ret <2 x double> [[RES]] ; %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> , <2 x double> %passthru) diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index 15ffc881b5731b..0f8b06e124211f 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -191,7 +191,7 @@ define <2 x double> @gather_zeromask(<2 x ptr> %ptrs, <2 x double> %passthru) { define <2 x double> @gather_onemask(<2 x ptr> %ptrs, <2 x double> %passthru) { ; CHECK-LABEL: @gather_onemask( -; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[PTRS:%.*]], i32 4, <2 x i1> , <2 x double> poison) +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[PTRS:%.*]], i32 4, <2 x i1> splat (i1 true), <2 x double> poison) ; CHECK-NEXT: ret <2 x double> [[RES]] ; %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> , <2 x double> %passthru) @@ -363,7 +363,7 @@ define void @negative_scatter_v4i16_no_uniform_vals_no_uniform_ptrs_all_active_m ; CHECK-LABEL: @negative_scatter_v4i16_no_uniform_vals_no_uniform_ptrs_all_active_mask( ; CHECK-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x ptr> [[INPTR:%.*]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[SRC:%.*]], align 2 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> [[WIDE_LOAD]], <4 x ptr> [[BROADCAST]], i32 2, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> [[WIDE_LOAD]], <4 x ptr> [[BROADCAST]], i32 2, <4 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; %broadcast= shufflevector <4 x ptr> %inPtr, <4 x ptr> poison, <4 x i32> zeroinitializer @@ -412,7 +412,7 @@ define <2 x i64> @gather_v2i64_uniform_ptrs_all_active_mask(ptr %src) { define <2 x i64> @negative_gather_v2i64_non_uniform_ptrs_all_active_mask(<2 x ptr> %inVal, ptr %src ) { ; CHECK-LABEL: @negative_gather_v2i64_non_uniform_ptrs_all_active_mask( ; CHECK-NEXT: [[INSERT_VALUE:%.*]] = insertelement <2 x ptr> [[INVAL:%.*]], ptr [[SRC:%.*]], i64 1 -; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[INSERT_VALUE]], i32 8, <2 x i1> , <2 x i64> poison) +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[INSERT_VALUE]], i32 8, <2 x i1> splat (i1 true), <2 x i64> poison) ; CHECK-NEXT: ret <2 x i64> [[RES]] ; %insert.value = insertelement <2 x ptr> %inVal, ptr %src, i32 1 diff --git a/llvm/test/Transforms/InstCombine/max-of-nots.ll b/llvm/test/Transforms/InstCombine/max-of-nots.ll index acce89b38be83d..b57f84b41033fd 100644 --- a/llvm/test/Transforms/InstCombine/max-of-nots.ll +++ b/llvm/test/Transforms/InstCombine/max-of-nots.ll @@ -4,7 +4,7 @@ define <2 x i32> @umin_of_nots(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @umin_of_nots( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) -; CHECK-NEXT: [[MIN:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[MIN:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[MIN]] ; %notx = xor <2 x i32> %x, @@ -17,7 +17,7 @@ define <2 x i32> @umin_of_nots(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @smin_of_nots(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @smin_of_nots( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) -; CHECK-NEXT: [[MIN:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[MIN:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[MIN]] ; %notx = xor <2 x i32> %x, @@ -242,7 +242,7 @@ define <2 x i32> @max_of_nots_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @max_of_nots_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> zeroinitializer) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[X:%.*]]) -; CHECK-NEXT: [[SMAX96:%.*]] = xor <2 x i32> [[TMP2]], +; CHECK-NEXT: [[SMAX96:%.*]] = xor <2 x i32> [[TMP2]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[SMAX96]] ; %c0 = icmp sgt <2 x i32> %y, zeroinitializer @@ -258,7 +258,7 @@ define <2 x i37> @max_of_nots_weird_type_vec(<2 x i37> %x, <2 x i37> %y) { ; CHECK-LABEL: @max_of_nots_weird_type_vec( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i37> @llvm.smax.v2i37(<2 x i37> [[Y:%.*]], <2 x i37> zeroinitializer) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i37> @llvm.smin.v2i37(<2 x i37> [[TMP1]], <2 x i37> [[X:%.*]]) -; CHECK-NEXT: [[SMAX96:%.*]] = xor <2 x i37> [[TMP2]], +; CHECK-NEXT: [[SMAX96:%.*]] = xor <2 x i37> [[TMP2]], splat (i37 -1) ; CHECK-NEXT: ret <2 x i37> [[SMAX96]] ; %c0 = icmp sgt <2 x i37> %y, zeroinitializer @@ -324,7 +324,7 @@ define i32 @min_of_max_swap(i32 %a) { define <2 x i32> @max_of_min_vec(<2 x i32> %a) { ; CHECK-LABEL: @max_of_min_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 -1) ; %not_a = xor <2 x i32> %a, %c0 = icmp sgt <2 x i32> %a, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/maximum.ll b/llvm/test/Transforms/InstCombine/maximum.ll index 7455d3b7b924cc..c79449472235b5 100644 --- a/llvm/test/Transforms/InstCombine/maximum.ll +++ b/llvm/test/Transforms/InstCombine/maximum.ll @@ -217,7 +217,7 @@ define float @maximum_f32_1_maximum_p0_val(float %x) { define <2 x float> @maximum_f32_1_maximum_val_p0_val_v2f32(<2 x float> %x) { ; CHECK-LABEL: @maximum_f32_1_maximum_val_p0_val_v2f32( -; CHECK-NEXT: [[Z:%.*]] = call <2 x float> @llvm.maximum.v2f32(<2 x float> [[X:%.*]], <2 x float> ) +; CHECK-NEXT: [[Z:%.*]] = call <2 x float> @llvm.maximum.v2f32(<2 x float> [[X:%.*]], <2 x float> splat (float 1.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[Z]] ; %y = call <2 x float> @llvm.maximum.v2f32(<2 x float> %x, <2 x float> zeroinitializer) diff --git a/llvm/test/Transforms/InstCombine/maxnum.ll b/llvm/test/Transforms/InstCombine/maxnum.ll index f26a5300febd85..67fb7a90b3d49e 100644 --- a/llvm/test/Transforms/InstCombine/maxnum.ll +++ b/llvm/test/Transforms/InstCombine/maxnum.ll @@ -217,7 +217,7 @@ define float @maxnum_f32_1_maxnum_p0_val(float %x) { define <2 x float> @maxnum_f32_1_maxnum_val_p0_val_v2f32(<2 x float> %x) { ; CHECK-LABEL: @maxnum_f32_1_maxnum_val_p0_val_v2f32( -; CHECK-NEXT: [[Z:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[X:%.*]], <2 x float> ) +; CHECK-NEXT: [[Z:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[X:%.*]], <2 x float> splat (float 1.000000e+00)) ; CHECK-NEXT: ret <2 x float> [[Z]] ; %y = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> zeroinitializer) diff --git a/llvm/test/Transforms/InstCombine/merge-icmp.ll b/llvm/test/Transforms/InstCombine/merge-icmp.ll index 4e8ef07dba04c8..4f658273ce0982 100644 --- a/llvm/test/Transforms/InstCombine/merge-icmp.ll +++ b/llvm/test/Transforms/InstCombine/merge-icmp.ll @@ -38,7 +38,7 @@ define i1 @and_test1_logical(ptr %x) { define <2 x i1> @and_test1_vector(ptr %x) { ; CHECK-LABEL: @and_test1_vector( ; CHECK-NEXT: [[LOAD:%.*]] = load <2 x i16>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i16> [[LOAD]], +; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i16> [[LOAD]], splat (i16 17791) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %load = load <2 x i16>, ptr %x, align 4 @@ -83,7 +83,7 @@ define i1 @and_test2_logical(ptr %x) { define <2 x i1> @and_test2_vector(ptr %x) { ; CHECK-LABEL: @and_test2_vector( ; CHECK-NEXT: [[LOAD:%.*]] = load <2 x i16>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i16> [[LOAD]], +; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i16> [[LOAD]], splat (i16 32581) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %load = load <2 x i16>, ptr %x, align 4 @@ -123,7 +123,7 @@ define i1 @or_basic_commuted(i16 %load) { define <2 x i1> @or_vector(<2 x i16> %load) { ; CHECK-LABEL: @or_vector( -; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i16> [[LOAD:%.*]], +; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i16> [[LOAD:%.*]], splat (i16 17791) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %trunc = trunc <2 x i16> %load to <2 x i8> diff --git a/llvm/test/Transforms/InstCombine/min-positive.ll b/llvm/test/Transforms/InstCombine/min-positive.ll index d2c2e9018792bd..db73974b7bf6ad 100644 --- a/llvm/test/Transforms/InstCombine/min-positive.ll +++ b/llvm/test/Transforms/InstCombine/min-positive.ll @@ -97,7 +97,7 @@ define i1 @maybe_not_positive(i32 %other) { define <2 x i1> @maybe_not_positive_vec(<2 x i32> %x, <2 x i32> %other) { ; CHECK-LABEL: @maybe_not_positive_vec( -; CHECK-NEXT: [[NOTNEG:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[NOTNEG:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 7) ; CHECK-NEXT: [[SEL:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[NOTNEG]], <2 x i32> [[OTHER:%.*]]) ; CHECK-NEXT: [[TEST:%.*]] = icmp sgt <2 x i32> [[SEL]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[TEST]] diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll index ec1c7aff409661..ccdf4400b16b54 100644 --- a/llvm/test/Transforms/InstCombine/minmax-fold.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll @@ -226,7 +226,7 @@ define i32 @test68(i32 %x) { define <2 x i32> @test68vec(<2 x i32> %x) { ; CHECK-LABEL: @test68vec( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 11)) ; CHECK-NEXT: ret <2 x i32> [[COND]] ; %cmp = icmp slt <2 x i32> , %x @@ -290,7 +290,7 @@ define i32 @test72(i32 %x) { define <2 x i32> @test72vec(<2 x i32> %x) { ; CHECK-LABEL: @test72vec( -; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 11)) ; CHECK-NEXT: ret <2 x i32> [[RETVAL]] ; %cmp = icmp sgt <2 x i32> %x, @@ -711,7 +711,7 @@ define <2 x i8> @min_through_cast_vec1(<2 x i32> %x) { define <2 x i8> @min_through_cast_vec2(<2 x i32> %x) { ; CHECK-LABEL: @min_through_cast_vec2( -; CHECK-NEXT: [[RES1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[RES1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 511)) ; CHECK-NEXT: [[RES:%.*]] = trunc <2 x i32> [[RES1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[RES]] ; @@ -963,8 +963,8 @@ define i32 @add_umin_extra_use(i32 %x, ptr %p) { define <2 x i16> @add_umin_vec(<2 x i16> %x) { ; CHECK-LABEL: @add_umin_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> ) -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> splat (i16 225)) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i16> [[TMP1]], splat (i16 15) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %a = add nuw <2 x i16> %x, @@ -1075,8 +1075,8 @@ define i32 @add_umax_extra_use(i32 %x, ptr %p) { define <2 x i33> @add_umax_vec(<2 x i33> %x) { ; CHECK-LABEL: @add_umax_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i33> @llvm.umax.v2i33(<2 x i33> [[X:%.*]], <2 x i33> ) -; CHECK-NEXT: [[R:%.*]] = add nuw <2 x i33> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i33> @llvm.umax.v2i33(<2 x i33> [[X:%.*]], <2 x i33> splat (i33 235)) +; CHECK-NEXT: [[R:%.*]] = add nuw <2 x i33> [[TMP1]], splat (i33 5) ; CHECK-NEXT: ret <2 x i33> [[R]] ; %a = add nuw <2 x i33> %x, @@ -1210,8 +1210,8 @@ define i32 @add_smin_extra_use(i32 %x, ptr %p) { define <2 x i16> @add_smin_vec(<2 x i16> %x) { ; CHECK-LABEL: @add_smin_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> ) -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> splat (i16 225)) +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i16> [[TMP1]], splat (i16 15) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %a = add nsw <2 x i16> %x, @@ -1318,8 +1318,8 @@ define i32 @add_smax_extra_use(i32 %x, ptr %p) { define <2 x i33> @add_smax_vec(<2 x i33> %x) { ; CHECK-LABEL: @add_smax_vec( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i33> @llvm.smax.v2i33(<2 x i33> [[X:%.*]], <2 x i33> ) -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i33> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i33> @llvm.smax.v2i33(<2 x i33> [[X:%.*]], <2 x i33> splat (i33 235)) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i33> [[TMP1]], splat (i33 5) ; CHECK-NEXT: ret <2 x i33> [[R]] ; %a = add nsw <2 x i33> %x, @@ -1470,7 +1470,7 @@ define i32 @test_smin_umin2(i32 %x) { define <2 x i32> @test_smin_umin_vec(<2 x i32> %x) { ; CHECK-LABEL: @test_smin_umin_vec( -; CHECK-NEXT: [[UMIN:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[UMIN:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 10)) ; CHECK-NEXT: ret <2 x i32> [[UMIN]] ; %smin = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %x, <2 x i32> ) diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index a76f0f84ba3401..0b7127f82b6125 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -398,7 +398,7 @@ define i8 @smax_of_nots(i8 %x, i8 %y) { define <3 x i8> @smin_of_nots(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @smin_of_nots( ; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]]) -; CHECK-NEXT: [[M:%.*]] = xor <3 x i8> [[TMP1]], +; CHECK-NEXT: [[M:%.*]] = xor <3 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <3 x i8> [[M]] ; %notx = xor <3 x i8> %x, @@ -478,7 +478,7 @@ define i8 @smax_of_not_and_const(i8 %x) { define <3 x i8> @smin_of_not_and_const(<3 x i8> %x) { ; CHECK-LABEL: @smin_of_not_and_const( ; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> ) -; CHECK-NEXT: [[M:%.*]] = xor <3 x i8> [[TMP1]], +; CHECK-NEXT: [[M:%.*]] = xor <3 x i8> [[TMP1]], splat (i8 -1) ; CHECK-NEXT: ret <3 x i8> [[M]] ; %notx = xor <3 x i8> %x, @@ -774,8 +774,8 @@ define i8 @clamp_two_vals_smax_smin(i8 %x) { define <3 x i8> @clamp_two_vals_smin_smax(<3 x i8> %x) { ; CHECK-LABEL: @clamp_two_vals_smin_smax( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i8> , <3 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <3 x i8> [[X:%.*]], splat (i8 41) +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i8> splat (i8 42), <3 x i8> splat (i8 41) ; CHECK-NEXT: ret <3 x i8> [[R]] ; %m = call <3 x i8> @llvm.smin.v3i8(<3 x i8> %x, <3 x i8> ) @@ -1933,8 +1933,8 @@ define i8 @smax_offset_uses(i8 %x) { define <3 x i8> @smin_offset(<3 x i8> %x) { ; CHECK-LABEL: @smin_offset( -; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X:%.*]], <3 x i8> ) -; CHECK-NEXT: [[M:%.*]] = or disjoint <3 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X:%.*]], <3 x i8> splat (i8 -127)) +; CHECK-NEXT: [[M:%.*]] = or disjoint <3 x i8> [[TMP1]], splat (i8 124) ; CHECK-NEXT: ret <3 x i8> [[M]] ; %a = add nsw nuw <3 x i8> %x, @@ -1996,8 +1996,8 @@ define i8 @smin_offset_uses(i8 %x) { define <3 x i8> @umax_offset(<3 x i8> %x) { ; CHECK-LABEL: @umax_offset( -; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> ) -; CHECK-NEXT: [[M:%.*]] = add nuw <3 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> splat (i8 3)) +; CHECK-NEXT: [[M:%.*]] = add nuw <3 x i8> [[TMP1]], splat (i8 127) ; CHECK-NEXT: ret <3 x i8> [[M]] ; %a = add nsw nuw <3 x i8> %x, @@ -2509,8 +2509,8 @@ entry: define <3 x i8> @fold_umax_with_knownbits_info_poison_in_splat(<3 x i8> %a, <3 x i8> %b) { ; CHECK-LABEL: @fold_umax_with_knownbits_info_poison_in_splat( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A1:%.*]] = or <3 x i8> [[A:%.*]], -; CHECK-NEXT: [[A2:%.*]] = shl <3 x i8> [[B:%.*]], +; CHECK-NEXT: [[A1:%.*]] = or <3 x i8> [[A:%.*]], splat (i8 1) +; CHECK-NEXT: [[A2:%.*]] = shl <3 x i8> [[B:%.*]], splat (i8 1) ; CHECK-NEXT: [[SUB:%.*]] = sub <3 x i8> [[A1]], [[A2]] ; CHECK-NEXT: ret <3 x i8> [[SUB]] ; @@ -2538,7 +2538,7 @@ entry: define <3 x i8> @fold_umin_with_knownbits_info_poison_in_splat(<3 x i8> %a, <3 x i8> %b) { ; CHECK-LABEL: @fold_umin_with_knownbits_info_poison_in_splat( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <3 x i8> +; CHECK-NEXT: ret <3 x i8> splat (i8 3) ; entry: %a1 = or <3 x i8> %a, diff --git a/llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll b/llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll index 8b896632b8adcc..e688391f72e631 100644 --- a/llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll +++ b/llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll @@ -15,7 +15,7 @@ declare void @barrier() define <2 x i8> @umax_xor_Cpow2(<2 x i8> %x) { ; CHECK-LABEL: @umax_xor_Cpow2( -; CHECK-NEXT: [[R:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x_xor = xor <2 x i8> %x, @@ -45,7 +45,7 @@ define i8 @smax_xor_Cpow2_pos(i8 %x) { define <2 x i8> @smin_xor_Cpow2_pos(<2 x i8> %x) { ; CHECK-LABEL: @smin_xor_Cpow2_pos( -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -17) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x_xor = xor <2 x i8> %x, @@ -55,7 +55,7 @@ define <2 x i8> @smin_xor_Cpow2_pos(<2 x i8> %x) { define <2 x i8> @smax_xor_Cpow2_neg(<2 x i8> %x) { ; CHECK-LABEL: @smax_xor_Cpow2_neg( -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 127) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x_xor = xor <2 x i8> %x, @@ -91,7 +91,7 @@ define <2 x i8> @umin_xor_pow2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @umin_xor_pow2( ; CHECK-NEXT: [[NY:%.*]] = sub <2 x i8> zeroinitializer, [[Y:%.*]] ; CHECK-NEXT: [[YP2:%.*]] = and <2 x i8> [[Y]], [[NY]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[YP2]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[YP2]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/modulo.ll b/llvm/test/Transforms/InstCombine/modulo.ll index 2988c524faedca..7a9584d69d3bf1 100644 --- a/llvm/test/Transforms/InstCombine/modulo.ll +++ b/llvm/test/Transforms/InstCombine/modulo.ll @@ -16,7 +16,7 @@ define i32 @modulo2(i32 %x) { define <2 x i32> @modulo2_vec(<2 x i32> %x) { ; CHECK-LABEL: @modulo2_vec( -; CHECK-NEXT: [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[RET_I]] ; %rem.i = srem <2 x i32> %x, @@ -43,9 +43,9 @@ define i32 @modulo3(i32 %x) { define <2 x i32> @modulo3_vec(<2 x i32> %x) { ; CHECK-LABEL: @modulo3_vec( -; CHECK-NEXT: [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <2 x i32> [[REM_I]], zeroinitializer -; CHECK-NEXT: [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> splat (i32 3), <2 x i32> zeroinitializer ; CHECK-NEXT: [[RET_I:%.*]] = add nsw <2 x i32> [[ADD_I]], [[REM_I]] ; CHECK-NEXT: ret <2 x i32> [[RET_I]] ; @@ -70,7 +70,7 @@ define i32 @modulo4(i32 %x) { define <2 x i32> @modulo4_vec(<2 x i32> %x) { ; CHECK-LABEL: @modulo4_vec( -; CHECK-NEXT: [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[RET_I]] ; %rem.i = srem <2 x i32> %x, @@ -97,9 +97,9 @@ define i32 @modulo7(i32 %x) { define <2 x i32> @modulo7_vec(<2 x i32> %x) { ; CHECK-LABEL: @modulo7_vec( -; CHECK-NEXT: [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], splat (i32 7) ; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <2 x i32> [[REM_I]], zeroinitializer -; CHECK-NEXT: [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> splat (i32 7), <2 x i32> zeroinitializer ; CHECK-NEXT: [[RET_I:%.*]] = add nsw <2 x i32> [[ADD_I]], [[REM_I]] ; CHECK-NEXT: ret <2 x i32> [[RET_I]] ; @@ -124,7 +124,7 @@ define i32 @modulo32(i32 %x) { define <2 x i32> @modulo32_vec(<2 x i32> %x) { ; CHECK-LABEL: @modulo32_vec( -; CHECK-NEXT: [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[RET_I]] ; %rem.i = srem <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll index 94f41236c6f867..997758af62a543 100644 --- a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll @@ -14,7 +14,7 @@ define i32 @pow2_multiplier(i32 %A) { define <2 x i32> @pow2_multiplier_vec(<2 x i32> %A) { ; CHECK-LABEL: @pow2_multiplier_vec( -; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %B = mul <2 x i32> %A, @@ -492,7 +492,7 @@ define i55 @neg_mul_constant_apint(i55 %A) { define <3 x i8> @neg_mul_constant_vec(<3 x i8> %a) { ; CHECK-LABEL: @neg_mul_constant_vec( -; CHECK-NEXT: [[B:%.*]] = mul <3 x i8> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = mul <3 x i8> [[A:%.*]], splat (i8 -5) ; CHECK-NEXT: ret <3 x i8> [[B]] ; %A = sub <3 x i8> zeroinitializer, %a @@ -502,7 +502,7 @@ define <3 x i8> @neg_mul_constant_vec(<3 x i8> %a) { define <3 x i4> @neg_mul_constant_vec_weird(<3 x i4> %a) { ; CHECK-LABEL: @neg_mul_constant_vec_weird( -; CHECK-NEXT: [[B:%.*]] = mul <3 x i4> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = mul <3 x i4> [[A:%.*]], splat (i4 -5) ; CHECK-NEXT: ret <3 x i4> [[B]] ; %A = sub <3 x i4> zeroinitializer, %a @@ -593,7 +593,7 @@ define i32 @test32(i32 %X) { define <2 x i32> @test32vec(<2 x i32> %X) { ; CHECK-LABEL: @test32vec( -; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %mul = mul nsw <2 x i32> %X, @@ -611,7 +611,7 @@ define i32 @test33(i32 %X) { define <2 x i32> @test33vec(<2 x i32> %X) { ; CHECK-LABEL: @test33vec( -; CHECK-NEXT: [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %mul = mul nsw <2 x i32> %X, @@ -767,7 +767,7 @@ define i32 @negate_if_false_nuw(i32 %x, i1 %cond) { define <2 x i8> @negate_if_true_commute(<2 x i8> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_commute( -; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[PX:%.*]] +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[PX:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i8> zeroinitializer, [[X]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[COND:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[R]] @@ -810,8 +810,8 @@ define i32 @negate_if_true_extra_use(i32 %x, i1 %cond) { define <2 x i8> @negate_if_true_wrong_constant(<2 x i8> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_wrong_constant( -; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[PX:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> , <2 x i8> +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[PX:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> , <2 x i8> splat (i8 1) ; CHECK-NEXT: [[R:%.*]] = mul <2 x i8> [[X]], [[SEL]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -956,7 +956,7 @@ define i32 @mulsub1(i32 %a0, i32 %a1) { define <2 x i32> @mulsub1_vec(<2 x i32> %a0, <2 x i32> %a1) { ; CHECK-LABEL: @mulsub1_vec( ; CHECK-NEXT: [[SUB_NEG:%.*]] = sub <2 x i32> [[A0:%.*]], [[A1:%.*]] -; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %sub = sub <2 x i32> %a1, %a0 @@ -999,8 +999,8 @@ define i32 @mulsub2(i32 %a0) { define <2 x i32> @mulsub2_vec(<2 x i32> %a0) { ; CHECK-LABEL: @mulsub2_vec( -; CHECK-NEXT: [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], splat (i32 2) +; CHECK-NEXT: [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], splat (i32 -64) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %sub = sub <2 x i32> , %a0 @@ -1043,8 +1043,8 @@ define i32 @muladd2(i32 %a0) { define <2 x i32> @muladd2_vec(<2 x i32> %a0) { ; CHECK-LABEL: @muladd2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = sub <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], splat (i32 2) +; CHECK-NEXT: [[MUL:%.*]] = sub <2 x i32> splat (i32 -64), [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %add = add <2 x i32> %a0, diff --git a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll index fd8ad88764f592..48d44c05f39dff 100644 --- a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll +++ b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll @@ -103,8 +103,8 @@ define i8 @one_demanded_bit(i8 %x) { define <2 x i8> @one_demanded_bit_splat(<2 x i8> %x) { ; CHECK-LABEL: @one_demanded_bit_splat( -; CHECK-NEXT: [[M:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[M]], +; CHECK-NEXT: [[M:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 5) +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[M]], splat (i8 32) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %m = mul <2 x i8> %x, ; 0b1010_0000 @@ -134,7 +134,7 @@ define i33 @squared_one_demanded_low_bit(i33 %x) { define <2 x i8> @squared_one_demanded_low_bit_splat(<2 x i8> %x) { ; CHECK-LABEL: @squared_one_demanded_low_bit_splat( -; CHECK-NEXT: [[AND:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 -2) ; CHECK-NEXT: ret <2 x i8> [[AND]] ; %mul = mul <2 x i8> %x, %x @@ -154,8 +154,8 @@ define i33 @squared_demanded_2_low_bits(i33 %x) { define <2 x i8> @squared_demanded_2_low_bits_splat(<2 x i8> %x) { ; CHECK-LABEL: @squared_demanded_2_low_bits_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[AND:%.*]] = or disjoint <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 1) +; CHECK-NEXT: [[AND:%.*]] = or disjoint <2 x i8> [[TMP1]], splat (i8 -4) ; CHECK-NEXT: ret <2 x i8> [[AND]] ; %mul = mul <2 x i8> %x, %x @@ -240,7 +240,7 @@ define i64 @scalar_mul_bit_x0_yC(i64 %x, i64 %y, i64 %c) { ; Vector tests define <2 x i64> @vector_mul_bit_x0_y0(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @vector_mul_bit_x0_y0( -; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[Y:%.*]], splat (i64 1) ; CHECK-NEXT: [[MUL:%.*]] = and <2 x i64> [[X:%.*]], [[AND2]] ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index 8c528e340bc6ce..e38ab1b9622b2c 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -14,7 +14,7 @@ define i32 @pow2_multiplier(i32 %A) { define <2 x i32> @pow2_multiplier_vec(<2 x i32> %A) { ; CHECK-LABEL: @pow2_multiplier_vec( -; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %B = mul <2 x i32> %A, @@ -161,7 +161,7 @@ define i5 @shl1_increment(i5 %x, i5 %y) { define <3 x i5> @shl1_nuw_increment_commute(<3 x i5> %x, <3 x i5> noundef %p) { ; CHECK-LABEL: @shl1_nuw_increment_commute( -; CHECK-NEXT: [[Y:%.*]] = ashr <3 x i5> [[P:%.*]], +; CHECK-NEXT: [[Y:%.*]] = ashr <3 x i5> [[P:%.*]], splat (i5 1) ; CHECK-NEXT: [[MULSHL:%.*]] = shl nuw <3 x i5> [[Y]], [[X:%.*]] ; CHECK-NEXT: [[M1:%.*]] = add nuw <3 x i5> [[MULSHL]], [[Y]] ; CHECK-NEXT: ret <3 x i5> [[M1]] @@ -989,7 +989,7 @@ define i32 @PR57278_or_disjoint_nsw(i32 %a) { define <2 x i32> @PR57278_shl_vec(<2 x i32> %v1) { ; CHECK-LABEL: @PR57278_shl_vec( ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <2 x i32> [[V1:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = add nuw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[MUL:%.*]] = add nuw <2 x i32> [[TMP1]], splat (i32 9) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %shl = shl nuw <2 x i32> %v1, @@ -1105,7 +1105,7 @@ define i55 @neg_mul_constant_apint(i55 %A) { define <3 x i8> @neg_mul_constant_vec(<3 x i8> %a) { ; CHECK-LABEL: @neg_mul_constant_vec( -; CHECK-NEXT: [[B:%.*]] = mul <3 x i8> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = mul <3 x i8> [[A:%.*]], splat (i8 -5) ; CHECK-NEXT: ret <3 x i8> [[B]] ; %A = sub <3 x i8> zeroinitializer, %a @@ -1115,7 +1115,7 @@ define <3 x i8> @neg_mul_constant_vec(<3 x i8> %a) { define <3 x i4> @neg_mul_constant_vec_weird(<3 x i4> %a) { ; CHECK-LABEL: @neg_mul_constant_vec_weird( -; CHECK-NEXT: [[B:%.*]] = mul <3 x i4> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = mul <3 x i4> [[A:%.*]], splat (i4 -5) ; CHECK-NEXT: ret <3 x i4> [[B]] ; %A = sub <3 x i4> zeroinitializer, %a @@ -1175,7 +1175,7 @@ define i32 @test32(i32 %X) { define <2 x i32> @test32vec(<2 x i32> %X) { ; CHECK-LABEL: @test32vec( -; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %mul = mul nsw <2 x i32> %X, @@ -1193,7 +1193,7 @@ define i32 @test33(i32 %X) { define <2 x i32> @test33vec(<2 x i32> %X) { ; CHECK-LABEL: @test33vec( -; CHECK-NEXT: [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %mul = mul nsw <2 x i32> %X, @@ -1479,7 +1479,7 @@ define i32 @negate_if_false(i32 %x, i1 %cond) { define <2 x i8> @negate_if_true_commute(<2 x i8> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_commute( -; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[PX:%.*]] +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[PX:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i8> zeroinitializer, [[X]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[COND:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[R]] @@ -1522,8 +1522,8 @@ define i32 @negate_if_true_extra_use(i32 %x, i1 %cond) { define <2 x i8> @negate_if_true_wrong_constant(<2 x i8> %px, i1 %cond) { ; CHECK-LABEL: @negate_if_true_wrong_constant( -; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[PX:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> , <2 x i8> +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[PX:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> , <2 x i8> splat (i8 1) ; CHECK-NEXT: [[R:%.*]] = mul <2 x i8> [[X]], [[SEL]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1735,7 +1735,7 @@ define i32 @mulsub1(i32 %a0, i32 %a1) { define <2 x i32> @mulsub1_vec(<2 x i32> %a0, <2 x i32> %a1) { ; CHECK-LABEL: @mulsub1_vec( ; CHECK-NEXT: [[SUB_NEG:%.*]] = sub <2 x i32> [[A0:%.*]], [[A1:%.*]] -; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %sub = sub <2 x i32> %a1, %a0 @@ -1778,8 +1778,8 @@ define i32 @mulsub2(i32 %a0) { define <2 x i32> @mulsub2_vec(<2 x i32> %a0) { ; CHECK-LABEL: @mulsub2_vec( -; CHECK-NEXT: [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], splat (i32 2) +; CHECK-NEXT: [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], splat (i32 -64) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %sub = sub <2 x i32> , %a0 @@ -1846,8 +1846,8 @@ define i32 @muladd2(i32 %a0) { define <2 x i32> @muladd2_vec(<2 x i32> %a0) { ; CHECK-LABEL: @muladd2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = sub <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], splat (i32 2) +; CHECK-NEXT: [[MUL:%.*]] = sub <2 x i32> splat (i32 -64), [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %add = add <2 x i32> %a0, @@ -1976,7 +1976,7 @@ define <2 x i14> @zext_negpow2_vec(<2 x i5> %x) { ; CHECK-LABEL: @zext_negpow2_vec( ; CHECK-NEXT: [[X_NEG:%.*]] = sub <2 x i5> zeroinitializer, [[X:%.*]] ; CHECK-NEXT: [[X_NEG_Z:%.*]] = zext <2 x i5> [[X_NEG]] to <2 x i14> -; CHECK-NEXT: [[R:%.*]] = shl <2 x i14> [[X_NEG_Z]], +; CHECK-NEXT: [[R:%.*]] = shl <2 x i14> [[X_NEG_Z]], splat (i14 11) ; CHECK-NEXT: ret <2 x i14> [[R]] ; %zx = zext <2 x i5> %x to <2 x i14> @@ -2015,7 +2015,7 @@ define <2 x i16> @sext_negpow2_vec(<2 x i8> %x) { ; CHECK-LABEL: @sext_negpow2_vec( ; CHECK-NEXT: [[X_NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]] ; CHECK-NEXT: [[X_NEG_Z:%.*]] = zext <2 x i8> [[X_NEG]] to <2 x i16> -; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i16> [[X_NEG_Z]], +; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i16> [[X_NEG_Z]], splat (i16 8) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %sx = sext <2 x i8> %x to <2 x i16> diff --git a/llvm/test/Transforms/InstCombine/mul_fold.ll b/llvm/test/Transforms/InstCombine/mul_fold.ll index e4a21db8a6ece7..ced30b6efa2046 100644 --- a/llvm/test/Transforms/InstCombine/mul_fold.ll +++ b/llvm/test/Transforms/InstCombine/mul_fold.ll @@ -542,8 +542,8 @@ define <2 x i8> @mul_v2i8_low(<2 x i8> %in0, <2 x i8> %in1) { define <2 x i8> @mul_v2i8_low_one_extra_user(<2 x i8> %in0, <2 x i8> %in1) { ; CHECK-LABEL: @mul_v2i8_low_one_extra_user( -; CHECK-NEXT: [[IN0HI:%.*]] = lshr <2 x i8> [[IN0:%.*]], -; CHECK-NEXT: [[IN1LO:%.*]] = and <2 x i8> [[IN1:%.*]], +; CHECK-NEXT: [[IN0HI:%.*]] = lshr <2 x i8> [[IN0:%.*]], splat (i8 4) +; CHECK-NEXT: [[IN1LO:%.*]] = and <2 x i8> [[IN1:%.*]], splat (i8 15) ; CHECK-NEXT: [[M01:%.*]] = mul nuw <2 x i8> [[IN1LO]], [[IN0HI]] ; CHECK-NEXT: call void @use_v2i8(<2 x i8> [[M01]]) ; CHECK-NEXT: [[RETLO:%.*]] = mul <2 x i8> [[IN0]], [[IN1]] diff --git a/llvm/test/Transforms/InstCombine/narrow-math.ll b/llvm/test/Transforms/InstCombine/narrow-math.ll index 0fcded7b0220a1..14527e5b760d7e 100644 --- a/llvm/test/Transforms/InstCombine/narrow-math.ll +++ b/llvm/test/Transforms/InstCombine/narrow-math.ll @@ -214,8 +214,8 @@ define i64 @sext_add_constant_extra_use(i32 %V) { define <2 x i64> @test5_splat(<2 x i32> %V) { ; CHECK-LABEL: @test5_splat( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1) +; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], splat (i32 1073741823) ; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; @@ -227,7 +227,7 @@ define <2 x i64> @test5_splat(<2 x i32> %V) { define <2 x i64> @test5_vec(<2 x i32> %V) { ; CHECK-LABEL: @test5_vec( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], ; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] @@ -253,8 +253,8 @@ define i64 @test6(i32 %V) { define <2 x i64> @test6_splat(<2 x i32> %V) { ; CHECK-LABEL: @test6_splat( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1) +; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], splat (i32 -1073741824) ; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; @@ -266,7 +266,7 @@ define <2 x i64> @test6_splat(<2 x i32> %V) { define <2 x i64> @test6_vec(<2 x i32> %V) { ; CHECK-LABEL: @test6_vec( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], ; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] @@ -279,7 +279,7 @@ define <2 x i64> @test6_vec(<2 x i32> %V) { define <2 x i64> @test6_vec2(<2 x i32> %V) { ; CHECK-LABEL: @test6_vec2( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], ; CHECK-NEXT: [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] @@ -305,8 +305,8 @@ define i64 @test7(i32 %V) { define <2 x i64> @test7_splat(<2 x i32> %V) { ; CHECK-LABEL: @test7_splat( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 1) +; CHECK-NEXT: [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], splat (i32 2147483647) ; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] ; @@ -318,7 +318,7 @@ define <2 x i64> @test7_splat(<2 x i32> %V) { define <2 x i64> @test7_vec(<2 x i32> %V) { ; CHECK-LABEL: @test7_vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], ; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ADD]] @@ -344,8 +344,8 @@ define i64 @test8(i32 %V) { define <2 x i64> @test8_splat(<2 x i32> %V) { ; CHECK-LABEL: @test8_splat( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16) +; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], splat (i32 32767) ; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; @@ -357,7 +357,7 @@ define <2 x i64> @test8_splat(<2 x i32> %V) { define <2 x i64> @test8_vec(<2 x i32> %V) { ; CHECK-LABEL: @test8_vec( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16) ; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], ; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] @@ -370,7 +370,7 @@ define <2 x i64> @test8_vec(<2 x i32> %V) { define <2 x i64> @test8_vec2(<2 x i32> %V) { ; CHECK-LABEL: @test8_vec2( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16) ; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], ; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] @@ -396,8 +396,8 @@ define i64 @test9(i32 %V) { define <2 x i64> @test9_splat(<2 x i32> %V) { ; CHECK-LABEL: @test9_splat( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16) +; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], splat (i32 -32767) ; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; @@ -409,7 +409,7 @@ define <2 x i64> @test9_splat(<2 x i32> %V) { define <2 x i64> @test9_vec(<2 x i32> %V) { ; CHECK-LABEL: @test9_vec( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16) ; CHECK-NEXT: [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], ; CHECK-NEXT: [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] @@ -435,8 +435,8 @@ define i64 @test10(i32 %V) { define <2 x i64> @test10_splat(<2 x i32> %V) { ; CHECK-LABEL: @test10_splat( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 16) +; CHECK-NEXT: [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], splat (i32 65535) ; CHECK-NEXT: [[MUL:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] ; @@ -448,7 +448,7 @@ define <2 x i64> @test10_splat(<2 x i32> %V) { define <2 x i64> @test10_vec(<2 x i32> %V) { ; CHECK-LABEL: @test10_vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 16) ; CHECK-NEXT: [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], ; CHECK-NEXT: [[MUL:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[MUL]] @@ -538,8 +538,8 @@ define i64 @test15(i32 %V) { define <2 x i64> @test15vec(<2 x i32> %V) { ; CHECK-LABEL: @test15vec( -; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = sub nsw <2 x i32> , [[ASHR]] +; CHECK-NEXT: [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1) +; CHECK-NEXT: [[NARROW:%.*]] = sub nsw <2 x i32> splat (i32 8), [[ASHR]] ; CHECK-NEXT: [[SUB:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[SUB]] ; @@ -564,8 +564,8 @@ define i64 @test16(i32 %V) { define <2 x i64> @test16vec(<2 x i32> %V) { ; CHECK-LABEL: @test16vec( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], -; CHECK-NEXT: [[NARROW:%.*]] = sub nuw <2 x i32> , [[LSHR]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 1) +; CHECK-NEXT: [[NARROW:%.*]] = sub nuw <2 x i32> splat (i32 -2), [[LSHR]] ; CHECK-NEXT: [[SUB:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[SUB]] ; diff --git a/llvm/test/Transforms/InstCombine/narrow.ll b/llvm/test/Transforms/InstCombine/narrow.ll index 40229f8511f76d..5fad7f07090b9f 100644 --- a/llvm/test/Transforms/InstCombine/narrow.ll +++ b/llvm/test/Transforms/InstCombine/narrow.ll @@ -37,7 +37,7 @@ define i32 @shrink_xor(i64 %a) { define <2 x i32> @shrink_xor_vec(<2 x i64> %a) { ; CHECK-LABEL: @shrink_xor_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[TRUNC:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TRUNC:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; %xor = xor <2 x i64> %a, diff --git a/llvm/test/Transforms/InstCombine/negated-bitmask.ll b/llvm/test/Transforms/InstCombine/negated-bitmask.ll index 91886781863476..95b03626df534a 100644 --- a/llvm/test/Transforms/InstCombine/negated-bitmask.ll +++ b/llvm/test/Transforms/InstCombine/negated-bitmask.ll @@ -31,8 +31,8 @@ define i8 @sub_mask1_lshr(i8 %a0) { define <4 x i32> @neg_mask1_lshr_vector_uniform(<4 x i32> %a0) { ; CHECK-LABEL: @neg_mask1_lshr_vector_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[A0:%.*]], splat (i32 28) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %shift = lshr <4 x i32> %a0, @@ -44,7 +44,7 @@ define <4 x i32> @neg_mask1_lshr_vector_uniform(<4 x i32> %a0) { define <4 x i32> @neg_mask1_lshr_vector_nonuniform(<4 x i32> %a0) { ; CHECK-LABEL: @neg_mask1_lshr_vector_nonuniform( ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %shift = lshr <4 x i32> %a0, @@ -56,7 +56,7 @@ define <4 x i32> @neg_mask1_lshr_vector_nonuniform(<4 x i32> %a0) { define <4 x i32> @sub_mask1_lshr_vector_nonuniform(<4 x i32> %a0) { ; CHECK-LABEL: @sub_mask1_lshr_vector_nonuniform( ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: [[NEG:%.*]] = add nsw <4 x i32> [[TMP2]], ; CHECK-NEXT: ret <4 x i32> [[NEG]] ; @@ -142,7 +142,7 @@ define i8 @neg_mask2_lshr_outofbounds(i8 %a0) { define <2 x i32> @neg_mask1_lshr_vector_var(<2 x i32> %a0, <2 x i32> %a1) { ; CHECK-LABEL: @neg_mask1_lshr_vector_var( ; CHECK-NEXT: [[SHIFT:%.*]] = lshr <2 x i32> [[A0:%.*]], [[A1:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHIFT]], +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHIFT]], splat (i32 1) ; CHECK-NEXT: [[NEG:%.*]] = sub nsw <2 x i32> zeroinitializer, [[MASK]] ; CHECK-NEXT: ret <2 x i32> [[NEG]] ; @@ -171,8 +171,8 @@ define i8 @neg_mask1_lshr_extrause_mask(i8 %a0) { ; Extra Use - shift define <2 x i32> @neg_mask1_lshr_extrause_lshr(<2 x i32> %a0) { ; CHECK-LABEL: @neg_mask1_lshr_extrause_lshr( -; CHECK-NEXT: [[SHIFT:%.*]] = lshr <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHIFT]], +; CHECK-NEXT: [[SHIFT:%.*]] = lshr <2 x i32> [[A0:%.*]], splat (i32 3) +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHIFT]], splat (i32 1) ; CHECK-NEXT: [[NEG:%.*]] = sub nsw <2 x i32> zeroinitializer, [[MASK]] ; CHECK-NEXT: call void @usev2i32(<2 x i32> [[SHIFT]]) ; CHECK-NEXT: ret <2 x i32> [[NEG]] @@ -200,7 +200,7 @@ define <2 x i64> @neg_signbit_use1(<2 x i32> %x) { ; CHECK-LABEL: @neg_signbit_use1( ; CHECK-NEXT: [[S:%.*]] = lshr <2 x i32> [[X:%.*]], ; CHECK-NEXT: call void @usev2i32(<2 x i32> [[S]]) -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X]], splat (i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; diff --git a/llvm/test/Transforms/InstCombine/nested-select.ll b/llvm/test/Transforms/InstCombine/nested-select.ll index d01dcf0793ade2..b8a7d4e8ab7894 100644 --- a/llvm/test/Transforms/InstCombine/nested-select.ll +++ b/llvm/test/Transforms/InstCombine/nested-select.ll @@ -515,7 +515,7 @@ define i8 @test_implied_true(i8 %x) { define <2 x i8> @test_implied_true_vec(<2 x i8> %x) { ; CHECK-LABEL: @test_implied_true_vec( ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i8> zeroinitializer, <2 x i8> +; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i8> zeroinitializer, <2 x i8> splat (i8 20) ; CHECK-NEXT: ret <2 x i8> [[SEL2]] ; %cmp1 = icmp slt <2 x i8> %x, @@ -570,10 +570,10 @@ define i8 @test_imply_fail(i8 %x) { define <2 x i8> @test_imply_type_mismatch(<2 x i8> %x, i8 %y) { ; CHECK-LABEL: @test_imply_type_mismatch( -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 10) ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0 -; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i8> zeroinitializer, <2 x i8> -; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], <2 x i8> [[SEL1]], <2 x i8> +; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i8> zeroinitializer, <2 x i8> splat (i8 5) +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], <2 x i8> [[SEL1]], <2 x i8> splat (i8 20) ; CHECK-NEXT: ret <2 x i8> [[SEL2]] ; %cmp1 = icmp slt <2 x i8> %x, @@ -584,6 +584,12 @@ define <2 x i8> @test_imply_type_mismatch(<2 x i8> %x, i8 %y) { } define <4 x i1> @test_dont_crash(i1 %cond, <4 x i1> %a, <4 x i1> %b) { +; CHECK-LABEL: @test_dont_crash( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <4 x i1> [[A:%.*]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[AND:%.*]] = and <4 x i1> [[SEL]], [[B:%.*]] +; CHECK-NEXT: ret <4 x i1> [[AND]] +; entry: %sel = select i1 %cond, <4 x i1> %a, <4 x i1> zeroinitializer %and = and <4 x i1> %sel, %b diff --git a/llvm/test/Transforms/InstCombine/not.ll b/llvm/test/Transforms/InstCombine/not.ll index 3679976d9dc393..d693b9d8f85578 100644 --- a/llvm/test/Transforms/InstCombine/not.ll +++ b/llvm/test/Transforms/InstCombine/not.ll @@ -73,7 +73,7 @@ define i1 @not_cmp_constant(i32 %a) { define <2 x i1> @not_cmp_constant_vector(<2 x i32> %a) { ; CHECK-LABEL: @not_cmp_constant_vector( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], splat (i32 -43) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %nota = xor <2 x i32> %a, @@ -114,7 +114,7 @@ define i8 @not_ashr_const(i8 %x) { define <2 x i8> @not_ashr_const_splat(<2 x i8> %x) { ; CHECK-LABEL: @not_ashr_const_splat( -; CHECK-NEXT: [[NOT:%.*]] = lshr <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = lshr <2 x i8> splat (i8 41), [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[NOT]] ; %shr = ashr <2 x i8> , %x @@ -147,7 +147,7 @@ define i8 @not_lshr_const(i8 %x) { define <2 x i8> @not_lshr_const_splat(<2 x i8> %x) { ; CHECK-LABEL: @not_lshr_const_splat( -; CHECK-NEXT: [[NOT:%.*]] = ashr <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = ashr <2 x i8> splat (i8 -43), [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[NOT]] ; %shr = lshr <2 x i8> , %x @@ -180,7 +180,7 @@ define i32 @not_sub_extra_use(i32 %y, ptr %p) { define <2 x i32> @not_sub_splat(<2 x i32> %y) { ; CHECK-LABEL: @not_sub_splat( -; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 -124) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %s = sub <2 x i32> , %y @@ -190,9 +190,9 @@ define <2 x i32> @not_sub_splat(<2 x i32> %y) { define <2 x i32> @not_sub_extra_use_splat(<2 x i32> %y, ptr %p) { ; CHECK-LABEL: @not_sub_extra_use_splat( -; CHECK-NEXT: [[S:%.*]] = sub <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[S:%.*]] = sub <2 x i32> splat (i32 123), [[Y:%.*]] ; CHECK-NEXT: store <2 x i32> [[S]], ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[Y]], +; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[Y]], splat (i32 -124) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %s = sub <2 x i32> , %y @@ -238,7 +238,7 @@ define i32 @not_add(i32 %x) { define <2 x i32> @not_add_splat(<2 x i32> %x) { ; CHECK-LABEL: @not_add_splat( -; CHECK-NEXT: [[R:%.*]] = sub <2 x i32> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = sub <2 x i32> splat (i32 -124), [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = add <2 x i32> %x, @@ -424,8 +424,8 @@ define i8 @not_or_neg(i8 %x, i8 %y) { define <3 x i5> @not_or_neg_commute_vec(<3 x i5> %x, <3 x i5> %p) { ; CHECK-LABEL: @not_or_neg_commute_vec( ; CHECK-NEXT: [[Y:%.*]] = mul <3 x i5> [[P:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = add <3 x i5> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <3 x i5> [[Y]], +; CHECK-NEXT: [[TMP1:%.*]] = add <3 x i5> [[X:%.*]], splat (i5 -1) +; CHECK-NEXT: [[TMP2:%.*]] = xor <3 x i5> [[Y]], splat (i5 -1) ; CHECK-NEXT: [[NOT:%.*]] = and <3 x i5> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <3 x i5> [[NOT]] ; @@ -527,8 +527,8 @@ define i1 @not_select_bool_const4(i1 %x, i1 %y) { define <2 x i1> @not_logicalAnd_not_op0(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @not_logicalAnd_not_op0( -; CHECK-NEXT: [[Y_NOT:%.*]] = xor <2 x i1> [[Y:%.*]], -; CHECK-NEXT: [[NOTAND:%.*]] = select <2 x i1> [[X:%.*]], <2 x i1> , <2 x i1> [[Y_NOT]] +; CHECK-NEXT: [[Y_NOT:%.*]] = xor <2 x i1> [[Y:%.*]], splat (i1 true) +; CHECK-NEXT: [[NOTAND:%.*]] = select <2 x i1> [[X:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[Y_NOT]] ; CHECK-NEXT: ret <2 x i1> [[NOTAND]] ; %notx = xor <2 x i1> %x, @@ -583,7 +583,7 @@ define i1 @not_logicalAnd_not_op0_use2(i1 %x, i1 %y) { define <2 x i1> @not_logicalOr_not_op0(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @not_logicalOr_not_op0( -; CHECK-NEXT: [[Y_NOT:%.*]] = xor <2 x i1> [[Y:%.*]], +; CHECK-NEXT: [[Y_NOT:%.*]] = xor <2 x i1> [[Y:%.*]], splat (i1 true) ; CHECK-NEXT: [[NOTOR:%.*]] = select <2 x i1> [[X:%.*]], <2 x i1> [[Y_NOT]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[NOTOR]] ; @@ -641,7 +641,7 @@ define i1 @not_logicalOr_not_op0_use2(i1 %x, i1 %y) { define <2 x i64> @bitcast_to_wide_elts_sext_bool(<4 x i1> %b) { ; CHECK-LABEL: @bitcast_to_wide_elts_sext_bool( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[B:%.*]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[NOT:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[NOT]] @@ -654,7 +654,7 @@ define <2 x i64> @bitcast_to_wide_elts_sext_bool(<4 x i1> %b) { define <8 x i16> @bitcast_to_narrow_elts_sext_bool(<4 x i1> %b) { ; CHECK-LABEL: @bitcast_to_narrow_elts_sext_bool( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[B:%.*]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[NOT:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x i16> ; CHECK-NEXT: ret <8 x i16> [[NOT]] @@ -680,7 +680,7 @@ define <2 x i16> @bitcast_to_vec_sext_bool(i1 %b) { define i128 @bitcast_to_scalar_sext_bool(<4 x i1> %b) { ; CHECK-LABEL: @bitcast_to_scalar_sext_bool( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[B:%.*]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[NOT:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 ; CHECK-NEXT: ret i128 [[NOT]] @@ -698,7 +698,7 @@ define <2 x i4> @bitcast_to_vec_sext_bool_use1(i1 %b) { ; CHECK-NEXT: [[SEXT:%.*]] = sext i1 [[B:%.*]] to i8 ; CHECK-NEXT: call void @use8(i8 [[SEXT]]) ; CHECK-NEXT: [[BC:%.*]] = bitcast i8 [[SEXT]] to <2 x i4> -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[BC]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[BC]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOT]] ; %sext = sext i1 %b to i8 diff --git a/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll b/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll index 8a508af632962a..787f8c7fd91701 100644 --- a/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll @@ -103,7 +103,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_shuffle_splat_vec( ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], +; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], splat (i32 17) ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t2 = zext <2 x i8> %x to <2 x i32> @@ -119,7 +119,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_undef_elt_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_shuffle_undef_elt_splat_vec( ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[SHUF]], +; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[SHUF]], splat (i32 17) ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t2 = zext <2 x i8> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/nsw.ll b/llvm/test/Transforms/InstCombine/nsw.ll index 6ced39a88c0a6a..329a47324f8623 100644 --- a/llvm/test/Transforms/InstCombine/nsw.ll +++ b/llvm/test/Transforms/InstCombine/nsw.ll @@ -103,7 +103,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_shuffle_splat_vec( ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], +; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], splat (i32 17) ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t2 = zext <2 x i8> %x to <2 x i32> @@ -119,7 +119,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_undef_elt_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_shuffle_undef_elt_splat_vec( ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[SHUF]], +; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[SHUF]], splat (i32 17) ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t2 = zext <2 x i8> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll index 3fd4a17d972af4..cd795aeeb212e3 100644 --- a/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll +++ b/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll @@ -72,7 +72,7 @@ define i1 @p3_scalar_shifted2_urem_by_const(i32 %x, i32 %y) { define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @p4_vector_urem_by_const__splat( -; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T0]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[T2]] ; @@ -111,7 +111,7 @@ define <4 x i1> @p6_vector_urem_by_const__nonsplat_poison0(<4 x i32> %x, <4 x i3 define <4 x i1> @p7_vector_urem_by_const__nonsplat_poison2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @p7_vector_urem_by_const__nonsplat_poison2( -; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T0]], ; CHECK-NEXT: ret <4 x i1> [[T2]] ; diff --git a/llvm/test/Transforms/InstCombine/onehot_merge.ll b/llvm/test/Transforms/InstCombine/onehot_merge.ll index d8ef66a4dd7818..2e57597455c2cd 100644 --- a/llvm/test/Transforms/InstCombine/onehot_merge.ll +++ b/llvm/test/Transforms/InstCombine/onehot_merge.ll @@ -31,8 +31,8 @@ define i1 @and_consts_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @and_consts_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @and_consts_vector( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], -; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], splat (i32 12) +; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 12) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %t1 = and <2 x i32> , %k @@ -84,8 +84,8 @@ define i1 @foo1_and_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @foo1_and_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_and_vector( -; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> , [[C1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> , [[C2:%.*]] +; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C2:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]] ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[K:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i32> [[TMP2]], [[TMP1]] @@ -148,8 +148,8 @@ define i1 @foo1_and_commuted_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @foo1_and_commuted_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_and_commuted_vector( ; CHECK-NEXT: [[K2:%.*]] = mul <2 x i32> [[K:%.*]], [[K]] -; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> , [[C1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> , [[C2:%.*]] +; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C2:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]] ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[K2]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i32> [[TMP2]], [[TMP1]] @@ -196,8 +196,8 @@ define i1 @or_consts_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @or_consts_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @or_consts_vector( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], -; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], splat (i32 12) +; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 12) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %t1 = and <2 x i32> , %k @@ -249,8 +249,8 @@ define i1 @foo1_or_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @foo1_or_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_or_vector( -; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> , [[C1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> , [[C2:%.*]] +; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C2:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]] ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[K:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP2]], [[TMP1]] @@ -313,8 +313,8 @@ define i1 @foo1_or_commuted_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @foo1_or_commuted_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_or_commuted_vector( ; CHECK-NEXT: [[K2:%.*]] = mul <2 x i32> [[K:%.*]], [[K]] -; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> , [[C1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> , [[C2:%.*]] +; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C2:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]] ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[K2]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP2]], [[TMP1]] @@ -372,8 +372,8 @@ define i1 @foo1_and_signbit_lshr_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @foo1_and_signbit_lshr_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_and_signbit_lshr_vector( -; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> , [[C1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = lshr exact <2 x i32> , [[C2:%.*]] +; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = lshr exact <2 x i32> splat (i32 -2147483648), [[C2:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]] ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[K:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = icmp ne <2 x i32> [[TMP2]], [[TMP1]] @@ -430,8 +430,8 @@ define i1 @foo1_or_signbit_lshr_logical(i32 %k, i32 %c1, i32 %c2) { define <2 x i1> @foo1_or_signbit_lshr_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) { ; CHECK-LABEL: @foo1_or_signbit_lshr_vector( -; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> , [[C1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = lshr exact <2 x i32> , [[C2:%.*]] +; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]] +; CHECK-NEXT: [[T4:%.*]] = lshr exact <2 x i32> splat (i32 -2147483648), [[C2:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]] ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[K:%.*]], [[TMP1]] ; CHECK-NEXT: [[OR:%.*]] = icmp eq <2 x i32> [[TMP2]], [[TMP1]] diff --git a/llvm/test/Transforms/InstCombine/operand-complexity.ll b/llvm/test/Transforms/InstCombine/operand-complexity.ll index 541a15275b6170..491bf950bf435a 100644 --- a/llvm/test/Transforms/InstCombine/operand-complexity.ll +++ b/llvm/test/Transforms/InstCombine/operand-complexity.ll @@ -60,7 +60,7 @@ define i8 @not(i8 %x) { define <2 x i8> @not_vec(<2 x i8> %x) { ; CHECK-LABEL: @not_vec( ; CHECK-NEXT: [[BO:%.*]] = udiv <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i8> [[X]], +; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i8> [[X]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = mul <2 x i8> [[BO]], [[NOTX]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/or-concat.ll b/llvm/test/Transforms/InstCombine/or-concat.ll index dfc3f0631773a3..deeaea33af9f12 100644 --- a/llvm/test/Transforms/InstCombine/or-concat.ll +++ b/llvm/test/Transforms/InstCombine/or-concat.ll @@ -65,7 +65,7 @@ define i64 @concat_bswap32_unary_flip(i64 %a0) { define <2 x i64> @concat_bswap32_unary_flip_vector(<2 x i64> %a0) { ; CHECK-LABEL: @concat_bswap32_unary_flip_vector( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> splat (i64 32)) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP1]]) ; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; @@ -103,7 +103,7 @@ define <2 x i64> @concat_bswap32_binary_vector(<2 x i32> %a0, <2 x i32> %a1) { ; CHECK-LABEL: @concat_bswap32_binary_vector( ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A1:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[A0:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], splat (i64 32) ; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i64> [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP4]]) ; CHECK-NEXT: ret <2 x i64> [[TMP5]] @@ -176,7 +176,7 @@ define i64 @concat_bitreverse32_unary_flip(i64 %a0) { define <2 x i64> @concat_bitreverse32_unary_flip_vector(<2 x i64> %a0) { ; CHECK-LABEL: @concat_bitreverse32_unary_flip_vector( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> splat (i64 32)) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]]) ; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; @@ -214,7 +214,7 @@ define <2 x i64> @concat_bitreverse32_binary_vector(<2 x i32> %a0, <2 x i32> %a1 ; CHECK-LABEL: @concat_bitreverse32_binary_vector( ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A1:%.*]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[A0:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], splat (i64 32) ; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i64> [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP4]]) ; CHECK-NEXT: ret <2 x i64> [[TMP5]] diff --git a/llvm/test/Transforms/InstCombine/or-xor.ll b/llvm/test/Transforms/InstCombine/or-xor.ll index f4ddbb5abc4639..b05ff15b8b3c84 100644 --- a/llvm/test/Transforms/InstCombine/or-xor.ll +++ b/llvm/test/Transforms/InstCombine/or-xor.ll @@ -81,7 +81,7 @@ define i32 @test5(i32 %x, i32 %y) { define <2 x i4> @test5_commuted(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @test5_commuted( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[Z:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[Z:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[Z]] ; %xor = xor <2 x i4> %x, %y @@ -437,7 +437,7 @@ define i9 @or_and_xor_not_constant_commute1(i9 %a, i9 %b) { define <2 x i9> @or_and_xor_not_constant_commute2_splat(<2 x i9> %a, <2 x i9> %b) { ; CHECK-LABEL: @or_and_xor_not_constant_commute2_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i9> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i9> [[A:%.*]], splat (i9 42) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i9> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret <2 x i9> [[XOR]] ; @@ -450,7 +450,7 @@ define <2 x i9> @or_and_xor_not_constant_commute2_splat(<2 x i9> %a, <2 x i9> %b define <2 x i9> @or_and_xor_not_constant_commute3_splat(<2 x i9> %a, <2 x i9> %b) { ; CHECK-LABEL: @or_and_xor_not_constant_commute3_splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i9> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i9> [[A:%.*]], splat (i9 42) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i9> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: ret <2 x i9> [[XOR]] ; @@ -862,7 +862,7 @@ define i8 @or_not_xor_common_op_commute3(i8 %x, i8 %y, i8 %p) { define <2 x i4> @or_not_xor_common_op_commute4(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { ; CHECK-LABEL: @or_not_xor_common_op_commute4( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 -1) ; CHECK-NEXT: [[O2:%.*]] = or <2 x i4> [[Z:%.*]], [[NAND]] ; CHECK-NEXT: ret <2 x i4> [[O2]] ; @@ -975,7 +975,7 @@ define <2 x i4> @or_nand_xor_common_op_commute1(<2 x i4> %x, <2 x i4> %y, <2 x i ; CHECK-LABEL: @or_nand_xor_common_op_commute1( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i4> [[Z:%.*]], [[X:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[AND]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[R]] ; %and = and <2 x i4> %z, %x diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index 9bcad034b363e7..4a886afd78a5f0 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -135,8 +135,8 @@ define i1 @test18_logical(i32 %A) { define <2 x i1> @test18vec(<2 x i32> %A) { ; CHECK-LABEL: @test18vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -100) +; CHECK-NEXT: [[D:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -50) ; CHECK-NEXT: ret <2 x i1> [[D]] ; %B = icmp sge <2 x i32> %A, @@ -196,8 +196,8 @@ define i16 @test23(i16 %A) { define <2 x i16> @test23vec(<2 x i16> %A) { ; CHECK-LABEL: @test23vec( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i16> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = xor <2 x i16> [[B]], +; CHECK-NEXT: [[B:%.*]] = lshr <2 x i16> [[A:%.*]], splat (i16 1) +; CHECK-NEXT: [[D:%.*]] = xor <2 x i16> [[B]], splat (i16 -24575) ; CHECK-NEXT: ret <2 x i16> [[D]] ; %B = lshr <2 x i16> %A, @@ -400,8 +400,8 @@ define i32 @test30(i32 %A) { define <2 x i32> @test30vec(<2 x i32> %A) { ; CHECK-LABEL: @test30vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[E:%.*]] = or disjoint <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -58312) +; CHECK-NEXT: [[E:%.*]] = or disjoint <2 x i32> [[TMP1]], splat (i32 32962) ; CHECK-NEXT: ret <2 x i32> [[E]] ; %B = or <2 x i32> %A, @@ -430,8 +430,8 @@ define i64 @test31(i64 %A) { define <2 x i64> @test31vec(<2 x i64> %A) { ; CHECK-LABEL: @test31vec( -; CHECK-NEXT: [[E:%.*]] = and <2 x i64> [[A:%.*]], -; CHECK-NEXT: [[F:%.*]] = or disjoint <2 x i64> [[E]], +; CHECK-NEXT: [[E:%.*]] = and <2 x i64> [[A:%.*]], splat (i64 4294908984) +; CHECK-NEXT: [[F:%.*]] = or disjoint <2 x i64> [[E]], splat (i64 32962) ; CHECK-NEXT: ret <2 x i64> [[F]] ; %B = or <2 x i64> %A, @@ -555,8 +555,8 @@ define i1 @test37_logical(i32 %x) { define <2 x i1> @test37_uniform(<2 x i32> %x) { ; CHECK-LABEL: @test37_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET1:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 7) +; CHECK-NEXT: [[RET1:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: ret <2 x i1> [[RET1]] ; %add1 = add <2 x i32> %x, @@ -861,9 +861,9 @@ define i1 @test46_logical(i8 signext %c) { define <2 x i1> @test46_uniform(<2 x i8> %c) { ; CHECK-LABEL: @test46_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[C:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], -; CHECK-NEXT: [[OR:%.*]] = icmp ult <2 x i8> [[TMP2]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[C:%.*]], splat (i8 -33) +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 -65) +; CHECK-NEXT: [[OR:%.*]] = icmp ult <2 x i8> [[TMP2]], splat (i8 26) ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %c.off = add <2 x i8> %c, @@ -940,9 +940,9 @@ define i1 @test47_logical(i8 signext %c) { define <2 x i1> @test47_nonuniform(<2 x i8> %c) { ; CHECK-LABEL: @test47_nonuniform( ; CHECK-NEXT: [[C_OFF:%.*]] = add <2 x i8> [[C:%.*]], -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i8> [[C_OFF]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i8> [[C_OFF]], splat (i8 27) ; CHECK-NEXT: [[C_OFF17:%.*]] = add <2 x i8> [[C]], -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult <2 x i8> [[C_OFF17]], +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult <2 x i8> [[C_OFF17]], splat (i8 27) ; CHECK-NEXT: [[OR:%.*]] = or <2 x i1> [[CMP1]], [[CMP2]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; @@ -966,7 +966,7 @@ define i32 @test49(i1 %C) { define <2 x i32> @test49vec(i1 %C) { ; CHECK-LABEL: @test49vec( -; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 1019), <2 x i32> splat (i32 123) ; CHECK-NEXT: ret <2 x i32> [[V]] ; %A = select i1 %C, <2 x i32> , <2 x i32> @@ -1013,7 +1013,7 @@ define <2 x i32> @test50vec(i1 %which) { ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ splat (i32 1019), [[ENTRY:%.*]] ], [ splat (i32 123), [[DELAY]] ] ; CHECK-NEXT: ret <2 x i32> [[A]] ; entry: @@ -1404,7 +1404,7 @@ define i32 @test3(i32 %x, i32 %y) { define <2 x i32> @test4_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test4_vec( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[OR1:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[OR1:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[OR1]] ; %or = or <2 x i32> %y, %x @@ -1479,7 +1479,7 @@ define i8 @ashr_bitwidth_mask(i8 %x, i8 %y) { define <2 x i8> @ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) { ; CHECK-LABEL: @ashr_bitwidth_mask_vec_commute( ; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], -; CHECK-NEXT: [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: [[R:%.*]] = or <2 x i8> [[Y]], [[SIGN]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1538,7 +1538,7 @@ define i1 @cmp_overlap(i32 %x) { define <2 x i1> @cmp_overlap_splat(<2 x i5> %x) { ; CHECK-LABEL: @cmp_overlap_splat( -; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i5> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i5> [[X:%.*]], splat (i5 1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %isneg = icmp slt <2 x i5> %x, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/overflow-mul.ll b/llvm/test/Transforms/InstCombine/overflow-mul.ll index 6b5a65c03ee102..1d18d9ffd46d26 100644 --- a/llvm/test/Transforms/InstCombine/overflow-mul.ll +++ b/llvm/test/Transforms/InstCombine/overflow-mul.ll @@ -216,7 +216,7 @@ define <4 x i32> @pr20113(<4 x i16> %a, <4 x i16> %b) { ; CHECK-NEXT: [[VMOVL_I_I726:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> ; CHECK-NEXT: [[VMOVL_I_I712:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32> ; CHECK-NEXT: [[MUL_I703:%.*]] = mul nuw <4 x i32> [[VMOVL_I_I712]], [[VMOVL_I_I726]] -; CHECK-NEXT: [[TMP:%.*]] = icmp sgt <4 x i32> [[MUL_I703]], +; CHECK-NEXT: [[TMP:%.*]] = icmp sgt <4 x i32> [[MUL_I703]], splat (i32 -1) ; CHECK-NEXT: [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[VCGEZ_I]] ; diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll index 478e6bd5878d5a..ab2584add4da7e 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll @@ -56,11 +56,11 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], splat (i32 -1) ; CHECK-NEXT: [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64> -; CHECK-NEXT: [[T2:%.*]] = shl nuw <8 x i64> , [[T1]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i64> [[T2]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = shl nuw <8 x i64> splat (i64 1), [[T1]] +; CHECK-NEXT: [[T3:%.*]] = add <8 x i64> [[T2]], splat (i64 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) @@ -68,7 +68,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, @@ -128,9 +128,9 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], ; CHECK-NEXT: [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64> -; CHECK-NEXT: [[T2:%.*]] = shl nuw <8 x i64> , [[T1]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i64> [[T2]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = shl nuw <8 x i64> splat (i64 1), [[T1]] +; CHECK-NEXT: [[T3:%.*]] = add <8 x i64> [[T2]], splat (i64 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll index 469375633b60e1..3db87b00c1a901 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll @@ -56,11 +56,11 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], +; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], splat (i32 -1) ; CHECK-NEXT: [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64> -; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> , [[T1]] -; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T1]] +; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], splat (i64 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) @@ -68,7 +68,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, @@ -128,9 +128,9 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], ; CHECK-NEXT: [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64> -; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> , [[T1]] -; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T1]] +; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], splat (i64 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll index 1a711e58c333be..e16aac9b912b25 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll @@ -49,14 +49,14 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -33) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll index cd0098ecdb0a6a..ebfa456cf0dd83 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -53,16 +53,16 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -33) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] -; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -85,7 +85,7 @@ define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat_poison( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) @@ -116,7 +116,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll index 9a4a5dd890eec2..5dda49b280e191 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll @@ -50,13 +50,13 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -33) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll index a9fafdbf7b8db7..4a0752f3d0e0ee 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll @@ -46,16 +46,16 @@ declare void @use8xi32(<8 x i32>) define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], -; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[T1]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], splat (i32 -1) +; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i32> splat (i32 1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[T1]], splat (i32 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, @@ -101,9 +101,9 @@ define <8 x i32> @t1_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], -; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[T1]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i32> splat (i32 1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[T1]], splat (i32 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll index bce2a1c3f7e505..a6c3f334172654 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll @@ -46,16 +46,16 @@ declare void @use8xi32(<8 x i32>) define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], -; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = xor <8 x i32> [[T1]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], splat (i32 -1) +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i32> splat (i32 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = xor <8 x i32> [[T1]], splat (i32 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, @@ -101,9 +101,9 @@ define <8 x i32> @t1_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], -; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = xor <8 x i32> [[T1]], -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i32> splat (i32 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = xor <8 x i32> [[T1]], splat (i32 -1) +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll index 55d0b3f80a519b..cc2a33e43cbf1b 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll @@ -38,12 +38,12 @@ declare void @use8xi32(<8 x i32>) define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = lshr <8 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T0:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -1) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = lshr <8 x i32> , %nbits @@ -76,7 +76,7 @@ define <8 x i32> @t1_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { define <8 x i32> @t1_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = lshr <8 x i32> , [[NBITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS:%.*]] ; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll index 7ad99a6bb0a38f..eff9bcb2ddb783 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll @@ -42,14 +42,14 @@ declare void @use8xi32(<8 x i32>) define <8 x i32> @t2_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> , [[NBITS]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS]] +; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -1) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T3]] -; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T4]] ; %t0 = shl <8 x i32> , %nbits @@ -66,7 +66,7 @@ define <8 x i32> @t2_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { define <8 x i32> @t2_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat_poison( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) @@ -88,8 +88,8 @@ define <8 x i32> @t2_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll index 7be65330013129..66a46aeb908b54 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll @@ -39,11 +39,11 @@ declare void @use8xi32(<8 x i32>) define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = shl <8 x i32> [[X:%.*]], [[NBITS:%.*]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -1) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647) ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = shl <8 x i32> %x, %nbits diff --git a/llvm/test/Transforms/InstCombine/pow-0.ll b/llvm/test/Transforms/InstCombine/pow-0.ll index 01c03df8c6b545..26a54c6850e2ab 100644 --- a/llvm/test/Transforms/InstCombine/pow-0.ll +++ b/llvm/test/Transforms/InstCombine/pow-0.ll @@ -40,7 +40,7 @@ define double @fast_minus_zero(double %value) { define <2 x double> @vec_zero(<2 x double> %value) { ; CHECK-LABEL: define <2 x double> @vec_zero( ; CHECK-SAME: <2 x double> [[VALUE:%.*]]) { -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %res = call <2 x double> @llvm.pow.v2f64(<2 x double> %value, <2 x double> ) ret <2 x double> %res @@ -49,7 +49,7 @@ define <2 x double> @vec_zero(<2 x double> %value) { define <2 x double> @vec_minus_zero(<2 x double> %value) { ; CHECK-LABEL: define <2 x double> @vec_minus_zero( ; CHECK-SAME: <2 x double> [[VALUE:%.*]]) { -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %res = call <2 x double> @llvm.pow.v2f64(<2 x double> %value, <2 x double> ) ret <2 x double> %res @@ -58,7 +58,7 @@ define <2 x double> @vec_minus_zero(<2 x double> %value) { define <2 x double> @vec_fast_zero(<2 x double> %value) { ; CHECK-LABEL: define <2 x double> @vec_fast_zero( ; CHECK-SAME: <2 x double> [[VALUE:%.*]]) { -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %res = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %value, <2 x double> ) ret <2 x double> %res @@ -67,7 +67,7 @@ define <2 x double> @vec_fast_zero(<2 x double> %value) { define <2 x double> @vec_fast_minus_zero(<2 x double> %value) { ; CHECK-LABEL: define <2 x double> @vec_fast_minus_zero( ; CHECK-SAME: <2 x double> [[VALUE:%.*]]) { -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %res = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %value, <2 x double> ) ret <2 x double> %res diff --git a/llvm/test/Transforms/InstCombine/pow-1.ll b/llvm/test/Transforms/InstCombine/pow-1.ll index 91e909aa25c7e3..c23d15d16a34ba 100644 --- a/llvm/test/Transforms/InstCombine/pow-1.ll +++ b/llvm/test/Transforms/InstCombine/pow-1.ll @@ -97,7 +97,7 @@ define float @test_simplify1_noerrno(float %x) { define <2 x float> @test_simplify1v(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_simplify1v( ; CHECK-SAME: <2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %retval @@ -134,7 +134,7 @@ define double @test_simplify2_noerrno(double %x) { define <2 x double> @test_simplify2v(<2 x double> %x) { ; CHECK-LABEL: define <2 x double> @test_simplify2v( ; CHECK-SAME: <2 x double> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %x) ret <2 x double> %retval @@ -253,22 +253,22 @@ define <2 x float> @test_simplify3v(<2 x float> %x) { ; ; VC32-LABEL: define <2 x float> @test_simplify3v( ; VC32-SAME: <2 x float> [[X:%.*]]) { -; VC32-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; VC32-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 2.000000e+00), <2 x float> [[X]]) ; VC32-NEXT: ret <2 x float> [[RETVAL]] ; ; VC19-LABEL: define <2 x float> @test_simplify3v( ; VC19-SAME: <2 x float> [[X:%.*]]) { -; VC19-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; VC19-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 2.000000e+00), <2 x float> [[X]]) ; VC19-NEXT: ret <2 x float> [[RETVAL]] ; ; VC64-LABEL: define <2 x float> @test_simplify3v( ; VC64-SAME: <2 x float> [[X:%.*]]) { -; VC64-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; VC64-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 2.000000e+00), <2 x float> [[X]]) ; VC64-NEXT: ret <2 x float> [[RETVAL]] ; ; NOLIB-LABEL: define <2 x float> @test_simplify3v( ; NOLIB-SAME: <2 x float> [[X:%.*]]) { -; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 2.000000e+00), <2 x float> [[X]]) ; NOLIB-NEXT: ret <2 x float> [[RETVAL]] ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %x) @@ -279,28 +279,28 @@ define <2 x float> @test_simplify3v(<2 x float> %x) { define <2 x double> @test_simplify3vn(<2 x double> %x) { ; ANY-LABEL: define <2 x double> @test_simplify3vn( ; ANY-SAME: <2 x double> [[X:%.*]]) { -; ANY-NEXT: [[MUL:%.*]] = fmul <2 x double> [[X]], +; ANY-NEXT: [[MUL:%.*]] = fmul <2 x double> [[X]], splat (double 2.000000e+00) ; ANY-NEXT: [[EXP2:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[MUL]]) ; ANY-NEXT: ret <2 x double> [[EXP2]] ; ; VC32-LABEL: define <2 x double> @test_simplify3vn( ; VC32-SAME: <2 x double> [[X:%.*]]) { -; VC32-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; VC32-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 4.000000e+00), <2 x double> [[X]]) ; VC32-NEXT: ret <2 x double> [[RETVAL]] ; ; VC19-LABEL: define <2 x double> @test_simplify3vn( ; VC19-SAME: <2 x double> [[X:%.*]]) { -; VC19-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; VC19-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 4.000000e+00), <2 x double> [[X]]) ; VC19-NEXT: ret <2 x double> [[RETVAL]] ; ; VC64-LABEL: define <2 x double> @test_simplify3vn( ; VC64-SAME: <2 x double> [[X:%.*]]) { -; VC64-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; VC64-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 4.000000e+00), <2 x double> [[X]]) ; VC64-NEXT: ret <2 x double> [[RETVAL]] ; ; NOLIB-LABEL: define <2 x double> @test_simplify3vn( ; NOLIB-SAME: <2 x double> [[X:%.*]]) { -; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 4.000000e+00), <2 x double> [[X]]) ; NOLIB-NEXT: ret <2 x double> [[RETVAL]] ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %x) @@ -450,22 +450,22 @@ define <2 x double> @test_simplify4v(<2 x double> %x) { ; ; VC32-LABEL: define <2 x double> @test_simplify4v( ; VC32-SAME: <2 x double> [[X:%.*]]) { -; VC32-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; VC32-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 2.000000e+00), <2 x double> [[X]]) ; VC32-NEXT: ret <2 x double> [[RETVAL]] ; ; VC19-LABEL: define <2 x double> @test_simplify4v( ; VC19-SAME: <2 x double> [[X:%.*]]) { -; VC19-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; VC19-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 2.000000e+00), <2 x double> [[X]]) ; VC19-NEXT: ret <2 x double> [[RETVAL]] ; ; VC64-LABEL: define <2 x double> @test_simplify4v( ; VC64-SAME: <2 x double> [[X:%.*]]) { -; VC64-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; VC64-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 2.000000e+00), <2 x double> [[X]]) ; VC64-NEXT: ret <2 x double> [[RETVAL]] ; ; NOLIB-LABEL: define <2 x double> @test_simplify4v( ; NOLIB-SAME: <2 x double> [[X:%.*]]) { -; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 2.000000e+00), <2 x double> [[X]]) ; NOLIB-NEXT: ret <2 x double> [[RETVAL]] ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %x) @@ -482,22 +482,22 @@ define <2 x float> @test_simplify4vn(<2 x float> %x) { ; ; VC32-LABEL: define <2 x float> @test_simplify4vn( ; VC32-SAME: <2 x float> [[X:%.*]]) { -; VC32-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; VC32-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 5.000000e-01), <2 x float> [[X]]) ; VC32-NEXT: ret <2 x float> [[RETVAL]] ; ; VC19-LABEL: define <2 x float> @test_simplify4vn( ; VC19-SAME: <2 x float> [[X:%.*]]) { -; VC19-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; VC19-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 5.000000e-01), <2 x float> [[X]]) ; VC19-NEXT: ret <2 x float> [[RETVAL]] ; ; VC64-LABEL: define <2 x float> @test_simplify4vn( ; VC64-SAME: <2 x float> [[X:%.*]]) { -; VC64-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; VC64-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 5.000000e-01), <2 x float> [[X]]) ; VC64-NEXT: ret <2 x float> [[RETVAL]] ; ; NOLIB-LABEL: define <2 x float> @test_simplify4vn( ; NOLIB-SAME: <2 x float> [[X:%.*]]) { -; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; NOLIB-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 5.000000e-01), <2 x float> [[X]]) ; NOLIB-NEXT: ret <2 x float> [[RETVAL]] ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %x) @@ -573,7 +573,7 @@ define float @test_simplify5_noerrno(float %x) { define <2 x float> @test_simplify5v(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_simplify5v( ; CHECK-SAME: <2 x float> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) ret <2 x float> %retval @@ -610,7 +610,7 @@ define double @test_simplify6_noerrno(double %x) { define <2 x double> @test_simplify6v(<2 x double> %x) { ; CHECK-LABEL: define <2 x double> @test_simplify6v( ; CHECK-SAME: <2 x double> [[X:%.*]]) { -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 1.000000e+00) ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %retval @@ -1317,7 +1317,7 @@ define float @pow_neg1_strict_noerrno(float %x) { define <2 x float> @pow_neg1_strictv(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @pow_neg1_strictv( ; CHECK-SAME: <2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv <2 x float> , [[X]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x float> [[RECIPROCAL]] ; %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> ) @@ -1357,7 +1357,7 @@ define double @pow_neg1_double_fast_noerrno(double %x) { define <2 x double> @pow_neg1_double_fastv(<2 x double> %x) { ; CHECK-LABEL: define <2 x double> @pow_neg1_double_fastv( ; CHECK-SAME: <2 x double> [[X:%.*]]) { -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast <2 x double> , [[X]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast <2 x double> splat (double 1.000000e+00), [[X]] ; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]] ; %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) @@ -1533,7 +1533,7 @@ define bfloat @test_pow_10_bf16(bfloat %x) { define <2 x half> @test_pow_10_v2f16(<2 x half> %x) { ; CHECK-LABEL: define <2 x half> @test_pow_10_v2f16( ; CHECK-SAME: <2 x half> [[X:%.*]]) { -; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x half> @llvm.pow.v2f16(<2 x half> , <2 x half> [[X]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x half> @llvm.pow.v2f16(<2 x half> splat (half 0xH4900), <2 x half> [[X]]) ; CHECK-NEXT: ret <2 x half> [[RETVAL]] ; %retval = call <2 x half> @llvm.pow.v2f16(<2 x half> , <2 x half> %x) @@ -1543,7 +1543,7 @@ define <2 x half> @test_pow_10_v2f16(<2 x half> %x) { define <2 x float> @test_pow_10_v2f32(<2 x float> %x) { ; CHECK-LABEL: define <2 x float> @test_pow_10_v2f32( ; CHECK-SAME: <2 x float> [[X:%.*]]) { -; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[X]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 1.000000e+01), <2 x float> [[X]]) ; CHECK-NEXT: ret <2 x float> [[RETVAL]] ; %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> %x) @@ -1553,7 +1553,7 @@ define <2 x float> @test_pow_10_v2f32(<2 x float> %x) { define <2 x double> @test_pow_10_v2f64(<2 x double> %x) { ; CHECK-LABEL: define <2 x double> @test_pow_10_v2f64( ; CHECK-SAME: <2 x double> [[X:%.*]]) { -; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[X]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 1.000000e+01), <2 x double> [[X]]) ; CHECK-NEXT: ret <2 x double> [[RETVAL]] ; %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> %x) @@ -1563,7 +1563,7 @@ define <2 x double> @test_pow_10_v2f64(<2 x double> %x) { define <2 x bfloat> @test_pow_10_v2bf16(<2 x bfloat> %x) { ; CHECK-LABEL: define <2 x bfloat> @test_pow_10_v2bf16( ; CHECK-SAME: <2 x bfloat> [[X:%.*]]) { -; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x bfloat> @llvm.pow.v2bf16(<2 x bfloat> , <2 x bfloat> [[X]]) +; CHECK-NEXT: [[RETVAL:%.*]] = call <2 x bfloat> @llvm.pow.v2bf16(<2 x bfloat> splat (bfloat 0xR4120), <2 x bfloat> [[X]]) ; CHECK-NEXT: ret <2 x bfloat> [[RETVAL]] ; %retval = call <2 x bfloat> @llvm.pow.v2bf16(<2 x bfloat> , <2 x bfloat> %x) diff --git a/llvm/test/Transforms/InstCombine/pow-sqrt.ll b/llvm/test/Transforms/InstCombine/pow-sqrt.ll index 9272c1c33a953c..7eef670ccea3dc 100644 --- a/llvm/test/Transforms/InstCombine/pow-sqrt.ll +++ b/llvm/test/Transforms/InstCombine/pow-sqrt.ll @@ -51,8 +51,8 @@ define <2 x double> @pow_intrinsic_half_approx(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_half_approx( ; CHECK-NEXT: [[SQRT:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) ; CHECK-NEXT: [[ABS:%.*]] = call afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], -; CHECK-NEXT: [[POW:%.*]] = select afn <2 x i1> [[ISINF]], <2 x double> , <2 x double> [[ABS]] +; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], splat (double 0xFFF0000000000000) +; CHECK-NEXT: [[POW:%.*]] = select afn <2 x i1> [[ISINF]], <2 x double> splat (double 0x7FF0000000000000), <2 x double> [[ABS]] ; CHECK-NEXT: ret <2 x double> [[POW]] ; %pow = call afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) @@ -206,7 +206,7 @@ define float @pow_libcall_neghalf_afn(float %x) { define <2 x double> @pow_intrinsic_neghalf_no_FMF(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_no_FMF( -; CHECK-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) +; CHECK-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> splat (double -5.000000e-01)) ; CHECK-NEXT: ret <2 x double> [[POW]] ; %pow = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) @@ -221,8 +221,8 @@ define <2 x double> @pow_intrinsic_neghalf_reassoc(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_reassoc( ; CHECK-NEXT: [[SQRT:%.*]] = call reassoc <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) ; CHECK-NEXT: [[ABS:%.*]] = call reassoc <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp reassoc oeq <2 x double> [[X]], -; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc <2 x double> , [[ABS]] +; CHECK-NEXT: [[ISINF:%.*]] = fcmp reassoc oeq <2 x double> [[X]], splat (double 0xFFF0000000000000) +; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc <2 x double> splat (double 1.000000e+00), [[ABS]] ; CHECK-NEXT: [[RECIPROCAL:%.*]] = select <2 x i1> [[ISINF]], <2 x double> zeroinitializer, <2 x double> [[TMP1]] ; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]] ; @@ -238,8 +238,8 @@ define <2 x double> @pow_intrinsic_neghalf_afn(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_afn( ; CHECK-NEXT: [[SQRT:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) ; CHECK-NEXT: [[ABS:%.*]] = call afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], -; CHECK-NEXT: [[TMP1:%.*]] = fdiv afn <2 x double> , [[ABS]] +; CHECK-NEXT: [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], splat (double 0xFFF0000000000000) +; CHECK-NEXT: [[TMP1:%.*]] = fdiv afn <2 x double> splat (double 1.000000e+00), [[ABS]] ; CHECK-NEXT: [[RECIPROCAL:%.*]] = select <2 x i1> [[ISINF]], <2 x double> zeroinitializer, <2 x double> [[TMP1]] ; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]] ; @@ -264,7 +264,7 @@ define <2 x double> @pow_intrinsic_neghalf_ninf(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_ninf( ; CHECK-NEXT: [[SQRT:%.*]] = call ninf afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) ; CHECK-NEXT: [[ABS:%.*]] = call ninf afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]]) -; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv ninf afn <2 x double> , [[ABS]] +; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv ninf afn <2 x double> splat (double 1.000000e+00), [[ABS]] ; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]] ; %pow = call afn ninf <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) diff --git a/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll b/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll index d31b7c9fe28359..0c4418a3094f96 100644 --- a/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll +++ b/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll @@ -127,7 +127,7 @@ define half @pow_sitofp_f16_const_base_2(i32 %x) { define <2 x float> @pow_sitofp_v2f32_const_base_2(<2 x i32> %x) { ; CHECK-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[X]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[X]]) ; CHECK-NEXT: ret <2 x float> [[EXP2]] ; %itofp = sitofp <2 x i32> %x to <2 x float> @@ -139,20 +139,20 @@ define <2 x float> @pow_sitofp_v2f32_const_base_8(<2 x i32> %x) { ; LDEXP-EXP2-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_8( ; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) { ; LDEXP-EXP2-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x float> -; LDEXP-EXP2-NEXT: [[MUL:%.*]] = fmul <2 x float> [[ITOFP]], +; LDEXP-EXP2-NEXT: [[MUL:%.*]] = fmul <2 x float> [[ITOFP]], splat (float 3.000000e+00) ; LDEXP-EXP2-NEXT: [[EXP2:%.*]] = tail call <2 x float> @llvm.exp2.v2f32(<2 x float> [[MUL]]) ; LDEXP-EXP2-NEXT: ret <2 x float> [[EXP2]] ; ; LDEXP-NOEXP2-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_8( ; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) { ; LDEXP-NOEXP2-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x float> -; LDEXP-NOEXP2-NEXT: [[POW:%.*]] = tail call <2 x float> @llvm.pow.v2f32(<2 x float> , <2 x float> [[ITOFP]]) +; LDEXP-NOEXP2-NEXT: [[POW:%.*]] = tail call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 8.000000e+00), <2 x float> [[ITOFP]]) ; LDEXP-NOEXP2-NEXT: ret <2 x float> [[POW]] ; ; NOLDEXP-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_8( ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) { ; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x float> -; NOLDEXP-NEXT: [[MUL:%.*]] = fmul <2 x float> [[ITOFP]], +; NOLDEXP-NEXT: [[MUL:%.*]] = fmul <2 x float> [[ITOFP]], splat (float 3.000000e+00) ; NOLDEXP-NEXT: [[EXP2:%.*]] = tail call <2 x float> @llvm.exp2.v2f32(<2 x float> [[MUL]]) ; NOLDEXP-NEXT: ret <2 x float> [[EXP2]] ; @@ -176,7 +176,7 @@ define <2 x float> @pow_sitofp_v2f32_const_base_mixed_2(<2 x i32> %x) { define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(<2 x i32> %x) { ; CHECK-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2__flags( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[EXP2:%.*]] = tail call nsz afn <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[X]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call nsz afn <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[X]]) ; CHECK-NEXT: ret <2 x float> [[EXP2]] ; %itofp = sitofp <2 x i32> %x to <2 x float> @@ -198,7 +198,7 @@ define @pow_sitofp_nxv4f32_const_base_2( define <2 x half> @pow_sitofp_v2f16_const_base_2(<2 x i32> %x) { ; CHECK-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_2( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> , <2 x i32> [[X]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> splat (half 0xH3C00), <2 x i32> [[X]]) ; CHECK-NEXT: ret <2 x half> [[EXP2]] ; %itofp = sitofp <2 x i32> %x to <2 x half> @@ -209,7 +209,7 @@ define <2 x half> @pow_sitofp_v2f16_const_base_2(<2 x i32> %x) { define <2 x double> @pow_sitofp_v2f64_const_base_2(<2 x i32> %x) { ; CHECK-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_2( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> , <2 x i32> [[X]]) +; CHECK-NEXT: [[EXP2:%.*]] = tail call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> splat (double 1.000000e+00), <2 x i32> [[X]]) ; CHECK-NEXT: ret <2 x double> [[EXP2]] ; %itofp = sitofp <2 x i32> %x to <2 x double> @@ -228,20 +228,20 @@ define <2 x half> @pow_sitofp_v2f16_const_base_8(<2 x i32> %x) { ; LDEXP-EXP2-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_8( ; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) { ; LDEXP-EXP2-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x half> -; LDEXP-EXP2-NEXT: [[MUL:%.*]] = fmul <2 x half> [[ITOFP]], +; LDEXP-EXP2-NEXT: [[MUL:%.*]] = fmul <2 x half> [[ITOFP]], splat (half 0xH4200) ; LDEXP-EXP2-NEXT: [[EXP2:%.*]] = tail call <2 x half> @llvm.exp2.v2f16(<2 x half> [[MUL]]) ; LDEXP-EXP2-NEXT: ret <2 x half> [[EXP2]] ; ; LDEXP-NOEXP2-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_8( ; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) { ; LDEXP-NOEXP2-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x half> -; LDEXP-NOEXP2-NEXT: [[POW:%.*]] = tail call <2 x half> @llvm.pow.v2f16(<2 x half> , <2 x half> [[ITOFP]]) +; LDEXP-NOEXP2-NEXT: [[POW:%.*]] = tail call <2 x half> @llvm.pow.v2f16(<2 x half> splat (half 0xH4800), <2 x half> [[ITOFP]]) ; LDEXP-NOEXP2-NEXT: ret <2 x half> [[POW]] ; ; NOLDEXP-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_8( ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) { ; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x half> -; NOLDEXP-NEXT: [[MUL:%.*]] = fmul <2 x half> [[ITOFP]], +; NOLDEXP-NEXT: [[MUL:%.*]] = fmul <2 x half> [[ITOFP]], splat (half 0xH4200) ; NOLDEXP-NEXT: [[EXP2:%.*]] = tail call <2 x half> @llvm.exp2.v2f16(<2 x half> [[MUL]]) ; NOLDEXP-NEXT: ret <2 x half> [[EXP2]] ; @@ -261,20 +261,20 @@ define <2 x double> @pow_sitofp_v2f64_const_base_8(<2 x i32> %x) { ; LDEXP-EXP2-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_8( ; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) { ; LDEXP-EXP2-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x double> -; LDEXP-EXP2-NEXT: [[MUL:%.*]] = fmul <2 x double> [[ITOFP]], +; LDEXP-EXP2-NEXT: [[MUL:%.*]] = fmul <2 x double> [[ITOFP]], splat (double 3.000000e+00) ; LDEXP-EXP2-NEXT: [[EXP2:%.*]] = tail call <2 x double> @llvm.exp2.v2f64(<2 x double> [[MUL]]) ; LDEXP-EXP2-NEXT: ret <2 x double> [[EXP2]] ; ; LDEXP-NOEXP2-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_8( ; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) { ; LDEXP-NOEXP2-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x double> -; LDEXP-NOEXP2-NEXT: [[POW:%.*]] = tail call <2 x double> @llvm.pow.v2f64(<2 x double> , <2 x double> [[ITOFP]]) +; LDEXP-NOEXP2-NEXT: [[POW:%.*]] = tail call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 8.000000e+00), <2 x double> [[ITOFP]]) ; LDEXP-NOEXP2-NEXT: ret <2 x double> [[POW]] ; ; NOLDEXP-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_8( ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) { ; NOLDEXP-NEXT: [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x double> -; NOLDEXP-NEXT: [[MUL:%.*]] = fmul <2 x double> [[ITOFP]], +; NOLDEXP-NEXT: [[MUL:%.*]] = fmul <2 x double> [[ITOFP]], splat (double 3.000000e+00) ; NOLDEXP-NEXT: [[EXP2:%.*]] = tail call <2 x double> @llvm.exp2.v2f64(<2 x double> [[MUL]]) ; NOLDEXP-NEXT: ret <2 x double> [[EXP2]] ; diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int.ll b/llvm/test/Transforms/InstCombine/pow_fp_int.ll index e5546075edc9bb..4a75a84d7b66de 100644 --- a/llvm/test/Transforms/InstCombine/pow_fp_int.ll +++ b/llvm/test/Transforms/InstCombine/pow_fp_int.ll @@ -531,7 +531,7 @@ define <2 x float> @pow_sitofp_const_base_2_no_fast_vector(<2 x i8> %x) { ; CHECK-LABEL: define <2 x float> @pow_sitofp_const_base_2_no_fast_vector( ; CHECK-SAME: <2 x i8> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32> -; CHECK-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> , <2 x i32> [[TMP1]]) +; CHECK-NEXT: [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x float> [[EXP2]] ; %s = sitofp <2 x i8> %x to <2 x float> diff --git a/llvm/test/Transforms/InstCombine/pr14365.ll b/llvm/test/Transforms/InstCombine/pr14365.ll index 3a09b55aba309d..1a51c174733778 100644 --- a/llvm/test/Transforms/InstCombine/pr14365.ll +++ b/llvm/test/Transforms/InstCombine/pr14365.ll @@ -16,7 +16,7 @@ define i32 @test0(i32 %a0) { define <4 x i32> @test0_vec(<4 x i32> %a0) { ; CHECK-LABEL: @test0_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], splat (i32 -1431655766) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = and <4 x i32> %a0, @@ -44,8 +44,8 @@ define i32 @test1(i32 %a0) { define <4 x i32> @test1_vec(<4 x i32> %a0) { ; CHECK-LABEL: @test1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], splat (i32 1431655765) ; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <4 x i32> [[A0]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; diff --git a/llvm/test/Transforms/InstCombine/pr17827.ll b/llvm/test/Transforms/InstCombine/pr17827.ll index 6c6110aa073a59..2f10bb5c7f25f0 100644 --- a/llvm/test/Transforms/InstCombine/pr17827.ll +++ b/llvm/test/Transforms/InstCombine/pr17827.ll @@ -49,9 +49,9 @@ define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) { define <2 x i1> @test_shift_and_cmp_changed1_vec(<2 x i8> %p, <2 x i8> %q) { ; CHECK-LABEL: @test_shift_and_cmp_changed1_vec( -; CHECK-NEXT: [[ANDP:%.*]] = shl <2 x i8> [[P:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[ANDP]], -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], +; CHECK-NEXT: [[ANDP:%.*]] = shl <2 x i8> [[P:%.*]], splat (i8 5) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[ANDP]], splat (i8 -64) +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], splat (i8 32) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %andp = and <2 x i8> %p, @@ -78,7 +78,7 @@ define i1 @test_shift_and_cmp_changed2(i8 %p) { define <2 x i1> @test_shift_and_cmp_changed2_vec(<2 x i8> %p) { ; CHECK-LABEL: @test_shift_and_cmp_changed2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[P:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[P:%.*]], splat (i8 6) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/pr38984-inseltpoison.ll b/llvm/test/Transforms/InstCombine/pr38984-inseltpoison.ll index 92f55b211b6390..4cafc4b1d475c2 100644 --- a/llvm/test/Transforms/InstCombine/pr38984-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/pr38984-inseltpoison.ll @@ -9,7 +9,7 @@ target datalayout = "p:16:16" define <4 x i1> @PR38984_1() { ; CHECK-LABEL: @PR38984_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %0 = load i16, ptr @offsets, align 1 diff --git a/llvm/test/Transforms/InstCombine/pr38984.ll b/llvm/test/Transforms/InstCombine/pr38984.ll index a7eddcfbe0845d..9dfc2257dfe83b 100644 --- a/llvm/test/Transforms/InstCombine/pr38984.ll +++ b/llvm/test/Transforms/InstCombine/pr38984.ll @@ -9,7 +9,7 @@ target datalayout = "p:16:16" define <4 x i1> @PR38984_1() { ; CHECK-LABEL: @PR38984_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %0 = load i16, ptr @offsets, align 1 diff --git a/llvm/test/Transforms/InstCombine/pr53357.ll b/llvm/test/Transforms/InstCombine/pr53357.ll index 0ae690869c1c44..2292abd0c90688 100644 --- a/llvm/test/Transforms/InstCombine/pr53357.ll +++ b/llvm/test/Transforms/InstCombine/pr53357.ll @@ -20,7 +20,7 @@ define i32 @src(i32 noundef %0, i32 noundef %1) { define <2 x i32> @src_vec(<2 x i32> noundef %0, <2 x i32> noundef %1) { ; CHECK-LABEL: @src_vec( ; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1:%.*]], [[TMP0:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[TMP4]] ; %3 = and <2 x i32> %1, %0 @@ -34,7 +34,7 @@ define <2 x i32> @src_vec(<2 x i32> noundef %0, <2 x i32> noundef %1) { define <2 x i32> @src_vec_poison(<2 x i32> noundef %0, <2 x i32> noundef %1) { ; CHECK-LABEL: @src_vec_poison( ; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1:%.*]], [[TMP0:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[TMP4]] ; %3 = and <2 x i32> %1, %0 diff --git a/llvm/test/Transforms/InstCombine/pr98435.ll b/llvm/test/Transforms/InstCombine/pr98435.ll index b400801d342fe7..78c8e860bed72b 100644 --- a/llvm/test/Transforms/InstCombine/pr98435.ll +++ b/llvm/test/Transforms/InstCombine/pr98435.ll @@ -4,7 +4,7 @@ define <2 x i1> @pr98435(<2 x i1> %val) { ; CHECK-LABEL: define <2 x i1> @pr98435( ; CHECK-SAME: <2 x i1> [[VAL:%.*]]) { -; CHECK-NEXT: [[VAL1:%.*]] = select <2 x i1> , <2 x i1> , <2 x i1> [[VAL]] +; CHECK-NEXT: [[VAL1:%.*]] = select <2 x i1> , <2 x i1> splat (i1 true), <2 x i1> [[VAL]] ; CHECK-NEXT: ret <2 x i1> [[VAL1]] ; %val1 = select <2 x i1> , <2 x i1> , <2 x i1> %val diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll index 24777b1b7f2085..bcbd78e23ed67f 100644 --- a/llvm/test/Transforms/InstCombine/ptrmask.ll +++ b/llvm/test/Transforms/InstCombine/ptrmask.ll @@ -23,7 +23,7 @@ define ptr @ptrmask_combine_consecutive_preserve_attrs(ptr %p0, i64 %m1) { define <2 x ptr> @ptrmask_combine_consecutive_preserve_attrs_vecs(<2 x ptr> %p0, <2 x i64> %m1) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_combine_consecutive_preserve_attrs_vecs ; CHECK-SAME: (<2 x ptr> [[P0:%.*]], <2 x i64> [[M1:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[M1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[M1]], splat (i64 12345) ; CHECK-NEXT: [[R:%.*]] = call align 128 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[TMP1]]) ; CHECK-NEXT: ret <2 x ptr> [[R]] ; @@ -114,7 +114,7 @@ define ptr addrspace(1) @ptrmask_combine_add_alignment2(ptr addrspace(1) align 3 define <2 x ptr> @ptrmask_combine_add_alignment_vec(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_combine_add_alignment_vec ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: [[R:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -96)) ; CHECK-NEXT: ret <2 x ptr> [[R]] ; %r = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> ) @@ -435,7 +435,7 @@ define <2 x ptr> @ptrmask_of_gep_vector_type_unimplemented(<2 x ptr> align 8 %p) ; CHECK-LABEL: define <2 x ptr> @ptrmask_of_gep_vector_type_unimplemented ; CHECK-SAME: (<2 x ptr> align 8 [[P:%.*]]) { ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, <2 x ptr> [[P]], i64 17 -; CHECK-NEXT: [[PM:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[GEP]], <2 x i64> ) +; CHECK-NEXT: [[PM:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[GEP]], <2 x i64> splat (i64 -96)) ; CHECK-NEXT: ret <2 x ptr> [[PM]] ; %gep = getelementptr i8, <2 x ptr> %p, i32 17 @@ -520,7 +520,7 @@ define ptr @ptrmask_is_useless4(i64 %i, i64 %m) { define <2 x ptr> @ptrmask_is_useless_vec(<2 x i64> %i, <2 x i64> %m) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_is_useless_vec ; CHECK-SAME: (<2 x i64> [[I:%.*]], <2 x i64> [[M:%.*]]) { -; CHECK-NEXT: [[I0:%.*]] = and <2 x i64> [[I]], +; CHECK-NEXT: [[I0:%.*]] = and <2 x i64> [[I]], splat (i64 31) ; CHECK-NEXT: [[P0:%.*]] = inttoptr <2 x i64> [[I0]] to <2 x ptr> ; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[M]]) ; CHECK-NEXT: ret <2 x ptr> [[R]] diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll index e3bcfde4ad59ca..f505a0e99ec501 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll @@ -57,9 +57,9 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i64> [[T1]], -; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i64> splat (i64 1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i64> [[T1]], splat (i64 -1) +; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[NBITS]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) @@ -127,9 +127,9 @@ define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) { define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i64> [[T1]], -; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = shl nuw <8 x i64> splat (i64 1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i64> [[T1]], splat (i64 -1) +; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[NBITS]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll index 8c61e24a97f1d0..c25b873b14a2ee 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll @@ -57,9 +57,9 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = xor <8 x i64> [[T1]], -; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = xor <8 x i64> [[T1]], splat (i64 -1) +; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[X:%.*]], [[T2]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[NBITS]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) @@ -128,8 +128,8 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], ; CHECK-NEXT: [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64> -; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> , [[T1]] -; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], +; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T1]] +; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], splat (i64 -1) ; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> , [[NBITS]] ; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[X:%.*]], [[T3]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll index a78246781c7f9d..c82c4428c31fbe 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll @@ -51,8 +51,8 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -32) ; CHECK-NEXT: [[T3:%.*]] = and <8 x i64> [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll index b79ab790975270..325612f0a6d08a 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -55,9 +55,9 @@ declare void @use8xi64(<8 x i64>) define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] +; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -32) ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) @@ -89,7 +89,7 @@ define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat_poison( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) @@ -122,7 +122,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> , [[T0]] +; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]] ; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll index 097c5663c129c7..699e11dcf0358d 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll @@ -52,7 +52,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -32) ; CHECK-NEXT: [[T3:%.*]] = lshr exact <8 x i64> [[T1]], [[T0]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll index d8af0ca2199763..6163aa8c7ac0cf 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll @@ -52,7 +52,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> ; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -32) ; CHECK-NEXT: [[T3:%.*]] = ashr exact <8 x i64> [[T1]], [[T0]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll index a7fd2717e7197c..53ccc50d968a8c 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll @@ -100,10 +100,10 @@ declare void @use3xi32(<3 x i32>) define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_splat( -; CHECK-NEXT: [[T1:%.*]] = shl nuw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T2:%.*]] = add nsw <3 x i32> [[T1]], +; CHECK-NEXT: [[T1:%.*]] = shl nuw <3 x i32> splat (i32 1), [[NBITS:%.*]] +; CHECK-NEXT: [[T2:%.*]] = add nsw <3 x i32> [[T1]], splat (i32 -1) ; CHECK-NEXT: [[T3:%.*]] = and <3 x i32> [[T2]], [[X:%.*]] -; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> , [[NBITS]] +; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[NBITS]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) @@ -129,8 +129,8 @@ define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { define <3 x i32> @t4_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t4_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <3 x i32> [[NBITS:%.*]], -; CHECK-NEXT: [[T1:%.*]] = shl nuw <3 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add nsw <3 x i32> [[T1]], +; CHECK-NEXT: [[T1:%.*]] = shl nuw <3 x i32> splat (i32 1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add nsw <3 x i32> [[T1]], splat (i32 -1) ; CHECK-NEXT: [[T3:%.*]] = and <3 x i32> [[T2]], [[X:%.*]] ; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> , [[NBITS]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll index e3c09813891163..98256ffbe53fb3 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll @@ -100,10 +100,10 @@ declare void @use3xi32(<3 x i32>) define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_splat( -; CHECK-NEXT: [[T1:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T2:%.*]] = xor <3 x i32> [[T1]], +; CHECK-NEXT: [[T1:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[T2:%.*]] = xor <3 x i32> [[T1]], splat (i32 -1) ; CHECK-NEXT: [[T3:%.*]] = and <3 x i32> [[X:%.*]], [[T2]] -; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> , [[NBITS]] +; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> splat (i32 32), [[NBITS]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[NBITS]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) @@ -129,8 +129,8 @@ define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { define <3 x i32> @t4_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t4_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = add <3 x i32> [[NBITS:%.*]], -; CHECK-NEXT: [[T1:%.*]] = shl nsw <3 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = xor <3 x i32> [[T1]], +; CHECK-NEXT: [[T1:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[T0]] +; CHECK-NEXT: [[T2:%.*]] = xor <3 x i32> [[T1]], splat (i32 -1) ; CHECK-NEXT: [[T3:%.*]] = and <3 x i32> [[X:%.*]], [[T2]] ; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> , [[NBITS]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll index 8428ef67d6b86b..f12e3e460860fb 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll @@ -59,9 +59,9 @@ declare void @use3xi32(<3 x i32>) define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i32> , [[NBITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = and <3 x i32> [[T0]], [[X:%.*]] -; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], splat (i32 1) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) @@ -80,7 +80,7 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i32> , [[NBITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = and <3 x i32> [[T0]], [[X:%.*]] ; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll index 5d8ff9e9fb71bd..731de2b94cf269 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll @@ -67,10 +67,10 @@ declare void @use3xi32(<3 x i32>) define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> , [[NBITS]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]] -; CHECK-NEXT: [[T3:%.*]] = add <3 x i32> [[NBITS]], +; CHECK-NEXT: [[T3:%.*]] = add <3 x i32> [[NBITS]], splat (i32 1) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) @@ -92,8 +92,8 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> , [[NBITS]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]] ; CHECK-NEXT: [[T3:%.*]] = add <3 x i32> [[NBITS]], ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) @@ -118,7 +118,7 @@ define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { define <3 x i32> @t4_vec_poison(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t4_vec_poison( ; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> , [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll index 74864e4473b114..1022a15fa04f56 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll @@ -60,7 +60,7 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = lshr exact <3 x i32> [[T0]], [[NBITS]] -; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], splat (i32 1) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll index 68626d5a2eabbe..4c03775ce75347 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll @@ -60,7 +60,7 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-LABEL: @t2_vec_splat( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> [[X:%.*]], [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = ashr exact <3 x i32> [[T0]], [[NBITS]] -; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], +; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], splat (i32 1) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) diff --git a/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll b/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll index 45880ac5efe51e..ca65e07e2d853a 100644 --- a/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll +++ b/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll @@ -39,7 +39,7 @@ define i32 @t1_sshr(i32 %data, i32 %nbits) { define <4 x i32> @t2_vec(<4 x i32> %data, <4 x i32> %nbits) { ; CHECK-LABEL: @t2_vec( -; CHECK-NEXT: [[T0:%.*]] = shl nsw <4 x i32> , [[NBITS:%.*]] +; CHECK-NEXT: [[T0:%.*]] = shl nsw <4 x i32> splat (i32 -1), [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = and <4 x i32> [[T0]], [[DATA:%.*]] ; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> [[T1]], [[NBITS]] ; CHECK-NEXT: ret <4 x i32> [[T2]] diff --git a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll index 45db2cf6758524..e7d6cc7102c713 100644 --- a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll +++ b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll @@ -98,7 +98,7 @@ define i8 @urem_XY_XZ_with_CY_lt_CZ_with_shl(i8 %X) { define <2 x i8> @urem_XY_XZ_with_CY_lt_CZ_with_nsw_out(<2 x i8> %X) { ; CHECK-LABEL: @urem_XY_XZ_with_CY_lt_CZ_with_nsw_out( -; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i8> [[X:%.*]], splat (i8 2) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %BO0 = shl nsw <2 x i8> %X, @@ -284,7 +284,7 @@ define i8 @srem_XY_XZ_with_CY_rem_CZ_eq_0_fail_missing_flag(i8 %X) { define <2 x i8> @srem_XY_XZ_with_CY_lt_CZ(<2 x i8> %X) { ; CHECK-LABEL: @srem_XY_XZ_with_CY_lt_CZ( -; CHECK-NEXT: [[R:%.*]] = shl nsw <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = shl nsw <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %BO0 = shl <2 x i8> %X, @@ -306,7 +306,7 @@ define i8 @srem_XY_XZ_with_CY_lt_CZ_with_nuw_out(i8 %X) { define <2 x i8> @srem_XY_XZ_with_CY_lt_CZ_with_nuw_out_with_shl(<2 x i8> %X) { ; CHECK-LABEL: @srem_XY_XZ_with_CY_lt_CZ_with_nuw_out_with_shl( -; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nuw nsw <2 x i8> splat (i8 3), [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %BO0 = shl nuw <2 x i8> , %X @@ -363,7 +363,7 @@ define i8 @srem_XY_XZ_with_CY_gt_CZ_with_nuw_out(i8 %X) { define <2 x i8> @srem_XY_XZ_with_CY_gt_CZ_no_nuw_out(<2 x i8> %X) { ; CHECK-LABEL: @srem_XY_XZ_with_CY_gt_CZ_no_nuw_out( -; CHECK-NEXT: [[R:%.*]] = shl nsw <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = shl nsw <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %BO0 = mul nsw <2 x i8> %X, diff --git a/llvm/test/Transforms/InstCombine/rem.ll b/llvm/test/Transforms/InstCombine/rem.ll index 4f7687aeaf8bc8..c1d8bb2b02b80e 100644 --- a/llvm/test/Transforms/InstCombine/rem.ll +++ b/llvm/test/Transforms/InstCombine/rem.ll @@ -79,7 +79,7 @@ define i8 @urem_with_sext_bool_divisor(i1 %x, i8 %y) { define <2 x i8> @urem_with_sext_bool_divisor_vec(<2 x i1> %x, <2 x i8> %y) { ; CHECK-LABEL: @urem_with_sext_bool_divisor_vec( ; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze <2 x i8> [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[Y_FROZEN]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[Y_FROZEN]], splat (i8 -1) ; CHECK-NEXT: [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> zeroinitializer, <2 x i8> [[Y_FROZEN]] ; CHECK-NEXT: ret <2 x i8> [[REM]] ; @@ -91,8 +91,8 @@ define <2 x i8> @urem_with_sext_bool_divisor_vec(<2 x i1> %x, <2 x i8> %y) { define <2 x i4> @big_divisor_vec(<2 x i4> %x) { ; CHECK-LABEL: @big_divisor_vec( ; CHECK-NEXT: [[X_FR:%.*]] = freeze <2 x i4> [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i4> [[X_FR]], -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i4> [[X_FR]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i4> [[X_FR]], splat (i4 -3) +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i4> [[X_FR]], splat (i4 3) ; CHECK-NEXT: [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i4> [[X_FR]], <2 x i4> [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[REM]] ; @@ -198,7 +198,7 @@ define i32 @test3(i32 %A) { define <2 x i32> @vec_power_of_2_constant_splat_divisor(<2 x i32> %A) { ; CHECK-LABEL: @vec_power_of_2_constant_splat_divisor( -; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 7) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %B = urem <2 x i32> %A, @@ -207,7 +207,7 @@ define <2 x i32> @vec_power_of_2_constant_splat_divisor(<2 x i32> %A) { define <2 x i19> @weird_vec_power_of_2_constant_splat_divisor(<2 x i19> %A) { ; CHECK-LABEL: @weird_vec_power_of_2_constant_splat_divisor( -; CHECK-NEXT: [[B:%.*]] = and <2 x i19> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i19> [[A:%.*]], splat (i19 7) ; CHECK-NEXT: ret <2 x i19> [[B]] ; %B = urem <2 x i19> %A, @@ -227,7 +227,7 @@ define i1 @test3a(i32 %A) { define <2 x i1> @test3a_vec(<2 x i32> %A) { ; CHECK-LABEL: @test3a_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 7) ; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -669,8 +669,8 @@ define i32 @test22(i32 %A) { define <2 x i32> @test23(<2 x i32> %A) { ; CHECK-LABEL: @test23( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = urem <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647) +; CHECK-NEXT: [[MUL:%.*]] = urem <2 x i32> [[AND]], splat (i32 2147483647) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %and = and <2 x i32> %A, @@ -691,7 +691,7 @@ define i1 @test24(i32 %A) { define <2 x i1> @test24_vec(<2 x i32> %A) { ; CHECK-LABEL: @test24_vec( -; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[B]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -713,7 +713,7 @@ define i1 @test25(i32 %A) { define <2 x i1> @test25_vec(<2 x i32> %A) { ; CHECK-LABEL: @test25_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -1034,7 +1034,7 @@ define i32 @urem_select_of_constants_divisor(i1 %b, i32 %x) { define <2 x i32> @PR62401(<2 x i1> %x, <2 x i32> %y) { ; CHECK-LABEL: @PR62401( ; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze <2 x i32> [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[Y_FROZEN]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[Y_FROZEN]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> zeroinitializer, <2 x i32> [[Y_FROZEN]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll b/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll index 301ead708a08f7..d12cebe292e391 100644 --- a/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll +++ b/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll @@ -63,8 +63,8 @@ define i32 @p3_sgt(i32 %x, i32 %y) { define <2 x i32> @p4_vec_splat_ult_65536(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p4_vec_splat_ult_65536( -; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] +; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %t = icmp ult <2 x i32> %x, @@ -73,8 +73,8 @@ define <2 x i32> @p4_vec_splat_ult_65536(<2 x i32> %x, <2 x i32> %y) { } define <2 x i32> @p5_vec_splat_ugt(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p5_vec_splat_ugt( -; CHECK-NEXT: [[T_INV:%.*]] = icmp ult <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] +; CHECK-NEXT: [[T_INV:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 65535) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %t = icmp ugt <2 x i32> %x, @@ -83,8 +83,8 @@ define <2 x i32> @p5_vec_splat_ugt(<2 x i32> %x, <2 x i32> %y) { } define <2 x i32> @p6_vec_splat_slt_65536(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p6_vec_splat_slt_65536( -; CHECK-NEXT: [[T_INV:%.*]] = icmp sgt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] +; CHECK-NEXT: [[T_INV:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 65535) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %t = icmp slt <2 x i32> %x, @@ -93,8 +93,8 @@ define <2 x i32> @p6_vec_splat_slt_65536(<2 x i32> %x, <2 x i32> %y) { } define <2 x i32> @p7_vec_splat_sgt(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p7_vec_splat_sgt( -; CHECK-NEXT: [[T_INV:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] +; CHECK-NEXT: [[T_INV:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 65535) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %t = icmp sgt <2 x i32> %x, @@ -106,8 +106,8 @@ define <2 x i32> @p7_vec_splat_sgt(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @p8_vec_nonsplat_poison0(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p8_vec_nonsplat_poison0( -; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] +; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %t = icmp ult <2 x i32> %x, @@ -116,7 +116,7 @@ define <2 x i32> @p8_vec_nonsplat_poison0(<2 x i32> %x, <2 x i32> %y) { } define <2 x i32> @p9_vec_nonsplat_poison1(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p9_vec_nonsplat_poison1( -; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -126,7 +126,7 @@ define <2 x i32> @p9_vec_nonsplat_poison1(<2 x i32> %x, <2 x i32> %y) { } define <2 x i32> @p10_vec_nonsplat_poison2(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @p10_vec_nonsplat_poison2( -; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> , <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -349,9 +349,9 @@ define i32 @ult_inf_loop(i32 %x) { define <2 x i32> @ult_inf_loop_vec(<2 x i32> %x) { ; CHECK-LABEL: @ult_inf_loop_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 38) +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -4) +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i32> splat (i32 -5), <2 x i32> splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %add = add <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll index 35d15ea0bea238..ea7c471594da0a 100644 --- a/llvm/test/Transforms/InstCombine/rotate.ll +++ b/llvm/test/Transforms/InstCombine/rotate.ll @@ -56,7 +56,7 @@ define i88 @rotl_i88_constant_commute(i88 %x) { define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) { ; CHECK-LABEL: @rotl_v2i16_constant_splat( -; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1)) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -67,7 +67,7 @@ define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) { define <2 x i16> @rotl_v2i16_constant_splat_poison0(<2 x i16> %x) { ; CHECK-LABEL: @rotl_v2i16_constant_splat_poison0( -; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1)) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -78,7 +78,7 @@ define <2 x i16> @rotl_v2i16_constant_splat_poison0(<2 x i16> %x) { define <2 x i16> @rotl_v2i16_constant_splat_poison1(<2 x i16> %x) { ; CHECK-LABEL: @rotl_v2i16_constant_splat_poison1( -; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1)) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, @@ -91,7 +91,7 @@ define <2 x i16> @rotl_v2i16_constant_splat_poison1(<2 x i16> %x) { define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) { ; CHECK-LABEL: @rotr_v2i17_constant_splat( -; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12)) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shl = shl <2 x i17> %x, @@ -102,7 +102,7 @@ define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) { define <2 x i17> @rotr_v2i17_constant_splat_poison0(<2 x i17> %x) { ; CHECK-LABEL: @rotr_v2i17_constant_splat_poison0( -; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12)) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shl = shl <2 x i17> %x, @@ -113,7 +113,7 @@ define <2 x i17> @rotr_v2i17_constant_splat_poison0(<2 x i17> %x) { define <2 x i17> @rotr_v2i17_constant_splat_poison1(<2 x i17> %x) { ; CHECK-LABEL: @rotr_v2i17_constant_splat_poison1( -; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> splat (i17 12)) ; CHECK-NEXT: ret <2 x i17> [[R]] ; %shl = shl <2 x i17> %x, @@ -238,7 +238,7 @@ define i8 @rotr_i8_commute(i8 %x, i8 %y) { define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @rotl_v4i32( -; CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> splat (i32 32), [[Y:%.*]] ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]] ; CHECK-NEXT: [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]] ; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[SHL]], [[SHR]] @@ -255,7 +255,7 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) { define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) { ; CHECK-LABEL: @rotr_v3i42( -; CHECK-NEXT: [[SUB:%.*]] = sub <3 x i42> , [[Y:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub <3 x i42> splat (i42 42), [[Y:%.*]] ; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i42> [[X:%.*]], [[SUB]] ; CHECK-NEXT: [[SHR:%.*]] = lshr <3 x i42> [[X]], [[Y]] ; CHECK-NEXT: [[R:%.*]] = or <3 x i42> [[SHR]], [[SHL]] diff --git a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll index e4dd2d10637d3e..68a621946374e8 100644 --- a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll +++ b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll @@ -51,7 +51,7 @@ define { i8, i1 } @no_fold_on_constant_add_overflow(i8 %x) { define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { ; CHECK-LABEL: @fold_simple_splat_constant( -; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 42)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = add nsw <2 x i32> %x, @@ -62,7 +62,7 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { ; CHECK-LABEL: @no_fold_splat_undef_constant( ; CHECK-NEXT: [[A:%.*]] = add nsw <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = add nsw <2 x i32> %x, @@ -73,7 +73,7 @@ define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @no_fold_splat_not_constant( ; CHECK-NEXT: [[A:%.*]] = add nsw <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = add nsw <2 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll b/llvm/test/Transforms/InstCombine/sadd_sat.ll index 1cce297122f8a1..d27e7aa28d62c9 100644 --- a/llvm/test/Transforms/InstCombine/sadd_sat.ll +++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll @@ -405,8 +405,8 @@ define <4 x i32> @sadd_satv4i4(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @sadd_satv4i4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> ) -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> ) +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> splat (i32 15)) +; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> splat (i32 -16)) ; CHECK-NEXT: ret <4 x i32> [[SPEC_STORE_SELECT8]] ; entry: @@ -422,8 +422,8 @@ define <4 x i32> @ssub_satv4i4(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @ssub_satv4i4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD:%.*]] = sub <4 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> ) -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> ) +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> splat (i32 15)) +; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> splat (i32 -16)) ; CHECK-NEXT: ret <4 x i32> [[SPEC_STORE_SELECT8]] ; entry: @@ -785,8 +785,8 @@ define <2 x i8> @ashrv2i8(<2 x i16> %a, <2 x i8> %b) { ; CHECK-NEXT: [[CONV:%.*]] = ashr <2 x i16> [[A:%.*]], ; CHECK-NEXT: [[CONV1:%.*]] = sext <2 x i8> [[B:%.*]] to <2 x i16> ; CHECK-NEXT: [[ADD:%.*]] = add <2 x i16> [[CONV]], [[CONV1]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ADD]], <2 x i16> ) -; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[SPEC_STORE_SELECT]], <2 x i16> ) +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ADD]], <2 x i16> splat (i16 -128)) +; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[SPEC_STORE_SELECT]], <2 x i16> splat (i16 127)) ; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw <2 x i16> [[SPEC_STORE_SELECT8]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[CONV7]] ; @@ -805,7 +805,7 @@ entry: define <2 x i8> @ashrv2i8_s(<2 x i16> %a, <2 x i8> %b) { ; CHECK-LABEL: @ashrv2i8_s( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i16> [[A:%.*]], +; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i16> [[A:%.*]], splat (i16 8) ; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw <2 x i16> [[TMP0]] to <2 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[TMP1]], <2 x i8> [[B:%.*]]) ; CHECK-NEXT: ret <2 x i8> [[TMP2]] diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index af8a9314a08049..9236d96f59a55b 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -61,7 +61,7 @@ define i8 @test_scalar_uadd_combine(i8 %a) { define <2 x i8> @test_vector_uadd_combine(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_combine( -; CHECK-NEXT: [[X2:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[X2:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 30)) ; CHECK-NEXT: ret <2 x i8> [[X2]] ; %x1 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -93,7 +93,7 @@ define i8 @test_scalar_uadd_overflow(i8 %a) { define <2 x i8> @test_vector_uadd_overflow(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_overflow( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %y1 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) %y2 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %y1, <2 x i8> ) @@ -113,7 +113,7 @@ define i8 @test_scalar_sadd_both_positive(i8 %a) { define <2 x i8> @test_vector_sadd_both_positive(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_sadd_both_positive( -; CHECK-NEXT: [[Z2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[Z2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 30)) ; CHECK-NEXT: ret <2 x i8> [[Z2]] ; %z1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -133,7 +133,7 @@ define i8 @test_scalar_sadd_both_negative(i8 %a) { define <2 x i8> @test_vector_sadd_both_negative(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_sadd_both_negative( -; CHECK-NEXT: [[U2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[U2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 -30)) ; CHECK-NEXT: ret <2 x i8> [[U2]] ; %u1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -177,7 +177,7 @@ define i8 @test_scalar_uadd_neg_neg(i8 %a) { define <2 x i8> @test_vector_uadd_neg_neg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_neg_neg( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %a_neg = or <2 x i8> %a, %r = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_neg, <2 x i8> ) @@ -198,7 +198,7 @@ define i8 @test_scalar_uadd_nneg_nneg(i8 %a) { define <2 x i8> @test_vector_uadd_nneg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_nneg_nneg( -; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: [[R:%.*]] = add nuw <2 x i8> [[A_NNEG]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -221,7 +221,7 @@ define i8 @test_scalar_uadd_neg_nneg(i8 %a) { define <2 x i8> @test_vector_uadd_neg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_neg_nneg( -; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> ) ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -243,8 +243,8 @@ define i8 @test_scalar_uadd_never_overflows(i8 %a) { define <2 x i8> @test_vector_uadd_never_overflows(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_never_overflows( -; CHECK-NEXT: [[A_MASKED:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i8> [[A_MASKED]], +; CHECK-NEXT: [[A_MASKED:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 -127) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i8> [[A_MASKED]], splat (i8 1) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a_masked = and <2 x i8> %a, @@ -263,7 +263,7 @@ define i8 @test_scalar_uadd_always_overflows(i8 %a) { define <2 x i8> @test_vector_uadd_always_overflows(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_uadd_always_overflows( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %a_masked = or <2 x i8> %a, %r = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_masked, <2 x i8> ) @@ -284,7 +284,7 @@ define i8 @test_scalar_sadd_neg_nneg(i8 %a) { define <2 x i8> @test_vector_sadd_neg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_sadd_neg_nneg( -; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_NEG]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -307,7 +307,7 @@ define i8 @test_scalar_sadd_nneg_neg(i8 %a) { define <2 x i8> @test_vector_sadd_nneg_neg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_sadd_nneg_neg( -; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_NNEG]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -330,7 +330,7 @@ define i8 @test_scalar_sadd_neg_neg(i8 %a) { define <2 x i8> @test_vector_sadd_neg_neg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_sadd_neg_neg( -; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> ) ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -474,7 +474,7 @@ define i8 @test_scalar_ssub_canonical(i8 %a) { define <2 x i8> @test_vector_ssub_canonical(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_ssub_canonical( -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 -10)) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -550,7 +550,7 @@ define i8 @test_simplify_decrement_ne(i8 %a) { define <2 x i8> @test_simplify_decrement_vec(<2 x i8> %a) { ; CHECK-LABEL: @test_simplify_decrement_vec( -; CHECK-NEXT: [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 1)) ; CHECK-NEXT: ret <2 x i8> [[I2]] ; %i = icmp eq <2 x i8> %a, @@ -561,7 +561,7 @@ define <2 x i8> @test_simplify_decrement_vec(<2 x i8> %a) { define <2 x i8> @test_simplify_decrement_vec_poison(<2 x i8> %a) { ; CHECK-LABEL: @test_simplify_decrement_vec_poison( -; CHECK-NEXT: [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 1)) ; CHECK-NEXT: ret <2 x i8> [[I2]] ; %i = icmp eq <2 x i8> %a, @@ -636,7 +636,7 @@ define i8 @test_invalid_simplify_other(i8 %a, i8 %b) { define <2 x i8> @test_vector_usub_combine(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_combine( -; CHECK-NEXT: [[X2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[X2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 30)) ; CHECK-NEXT: ret <2 x i8> [[X2]] ; %x1 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -688,7 +688,7 @@ define i8 @test_scalar_ssub_both_positive(i8 %a) { define <2 x i8> @test_vector_ssub_both_positive(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_ssub_both_positive( -; CHECK-NEXT: [[Z2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[Z2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 -30)) ; CHECK-NEXT: ret <2 x i8> [[Z2]] ; %z1 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -708,7 +708,7 @@ define i8 @test_scalar_ssub_both_negative(i8 %a) { define <2 x i8> @test_vector_ssub_both_negative(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_ssub_both_negative( -; CHECK-NEXT: [[U2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[U2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 30)) ; CHECK-NEXT: ret <2 x i8> [[U2]] ; %u1 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -741,7 +741,7 @@ define i8 @test_scalar_sadd_ssub(i8 %a) { define <2 x i8> @test_vector_sadd_ssub(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_sadd_ssub( -; CHECK-NEXT: [[V2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[V2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 -30)) ; CHECK-NEXT: ret <2 x i8> [[V2]] ; %v1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> , <2 x i8> %a) @@ -794,7 +794,7 @@ define i8 @test_scalar_usub_neg_nneg(i8 %a) { define <2 x i8> @test_vector_usub_neg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_neg_nneg( -; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[A_NEG]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -817,7 +817,7 @@ define i8 @test_scalar_usub_nneg_nneg(i8 %a) { define <2 x i8> @test_vector_usub_nneg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_nneg_nneg( -; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A_NNEG]], <2 x i8> ) ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -839,8 +839,8 @@ define i8 @test_scalar_usub_never_overflows(i8 %a) { define <2 x i8> @test_vector_usub_never_overflows(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_never_overflows( -; CHECK-NEXT: [[A_MASKED:%.*]] = or <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_MASKED]], +; CHECK-NEXT: [[A_MASKED:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 64) +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_MASKED]], splat (i8 -10) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a_masked = or <2 x i8> %a, @@ -880,7 +880,7 @@ define i8 @test_scalar_ssub_neg_neg(i8 %a) { define <2 x i8> @test_vector_ssub_neg_neg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_ssub_neg_neg( -; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_NEG]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -903,7 +903,7 @@ define i8 @test_scalar_ssub_nneg_nneg(i8 %a) { define <2 x i8> @test_vector_ssub_nneg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_ssub_nneg_nneg( -; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[A_NNEG]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -926,7 +926,7 @@ define i8 @test_scalar_ssub_neg_nneg(i8 %a) { define <2 x i8> @test_vector_ssub_neg_nneg(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_ssub_neg_nneg( -; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128) ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> ) ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1044,7 +1044,7 @@ define i8 @test_scalar_usub_add_nuw_inferred(i8 %a) { define <2 x i8> @test_vector_usub_add_nuw_no_ov(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov( -; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[A:%.*]], splat (i8 1) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %b = add nuw <2 x i8> %a, @@ -1076,7 +1076,7 @@ define <3 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat1_poison(<3 x i8> %a) { define <2 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat2(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov_nonsplat2( ; CHECK-NEXT: [[B:%.*]] = add nuw <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> splat (i8 9)) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %b = add nuw <2 x i8> %a, @@ -1124,8 +1124,8 @@ define i8 @test_scalar_ssub_add_nsw_may_ov(i8 %a, i8 %b) { define <2 x i8> @test_vector_ssub_add_nsw_no_ov_splat(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_splat( -; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], +; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], splat (i8 7) +; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 7) ; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> [[AA]], [[BB]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1137,7 +1137,7 @@ define <2 x i8> @test_vector_ssub_add_nsw_no_ov_splat(<2 x i8> %a, <2 x i8> %b) define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat1(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat1( -; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], splat (i8 7) ; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], ; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> [[AA]], [[BB]] ; CHECK-NEXT: ret <2 x i8> [[R]] @@ -1151,7 +1151,7 @@ define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat1(<2 x i8> %a, <2 x i8> define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat2(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat2( ; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], +; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 7) ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[AA]], <2 x i8> [[BB]]) ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1459,7 +1459,7 @@ define i32 @uadd_sat_negative_one(i32 %x) { define <2 x i8> @uadd_sat_flipped4_vector(<2 x i8> %x) { ; CHECK-LABEL: @uadd_sat_flipped4_vector( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 9)) ; CHECK-NEXT: ret <2 x i8> [[COND]] ; %cmp = icmp ult <2 x i8> %x, @@ -1470,7 +1470,7 @@ define <2 x i8> @uadd_sat_flipped4_vector(<2 x i8> %x) { define <2 x i8> @uadd_sat_flipped4_poison_vector(<2 x i8> %x) { ; CHECK-LABEL: @uadd_sat_flipped4_poison_vector( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 9)) ; CHECK-NEXT: ret <2 x i8> [[COND]] ; %cmp = icmp ult <2 x i8> %x, @@ -1481,7 +1481,7 @@ define <2 x i8> @uadd_sat_flipped4_poison_vector(<2 x i8> %x) { define <2 x i8> @uadd_sat_flipped4_poison_vector_compare(<2 x i8> %x) { ; CHECK-LABEL: @uadd_sat_flipped4_poison_vector_compare( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 9)) ; CHECK-NEXT: ret <2 x i8> [[COND]] ; %cmp = icmp ult <2 x i8> %x, @@ -1492,7 +1492,7 @@ define <2 x i8> @uadd_sat_flipped4_poison_vector_compare(<2 x i8> %x) { define <2 x i8> @uadd_sat_flipped4_poison_vector_compare2(<2 x i8> %x) { ; CHECK-LABEL: @uadd_sat_flipped4_poison_vector_compare2( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %cmp = icmp ult <2 x i8> %x, %add = add <2 x i8> %x, @@ -1878,7 +1878,7 @@ define i32 @uadd_sat_not_uge(i32 %x, i32 %y) { define <2 x i32> @uadd_sat_not_ugt_commute_add(<2 x i32> %x, <2 x i32> %yp) { ; CHECK-LABEL: @uadd_sat_not_ugt_commute_add( ; CHECK-NEXT: [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], -; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]]) ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -1895,7 +1895,7 @@ define <2 x i32> @uadd_sat_not_ugt_commute_add_partial_poison(<2 x i32> %x, <2 x ; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i32> [[YP:%.*]], [[NOTX]] ; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[YP]], [[X]] -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> , <2 x i32> [[A]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> splat (i32 -1), <2 x i32> [[A]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %notx = xor <2 x i32> %x, @@ -1950,7 +1950,7 @@ define <2 x i32> @uadd_sat_not_commute_select_ugt(<2 x i32> %xp, <2 x i32> %yp) ; CHECK-LABEL: @uadd_sat_not_commute_select_ugt( ; CHECK-NEXT: [[X:%.*]] = urem <2 x i32> , [[XP:%.*]] ; CHECK-NEXT: [[Y:%.*]] = srem <2 x i32> , [[YP:%.*]] -; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X]], +; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]]) ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -2057,7 +2057,7 @@ define i32 @uadd_sat_canon_y_nuw(i32 %x, i32 %y) { define <4 x i32> @uadd_sat_constant_vec(<4 x i32> %x) { ; CHECK-LABEL: @uadd_sat_constant_vec( -; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) +; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> splat (i32 42)) ; CHECK-NEXT: ret <4 x i32> [[R]] ; %a = add <4 x i32> %x, @@ -2068,7 +2068,7 @@ define <4 x i32> @uadd_sat_constant_vec(<4 x i32> %x) { define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) { ; CHECK-LABEL: @uadd_sat_constant_vec_commute( -; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) +; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> splat (i32 42)) ; CHECK-NEXT: ret <4 x i32> [[R]] ; %a = add <4 x i32> %x, @@ -2079,7 +2079,7 @@ define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) { define <4 x i32> @uadd_sat_constant_vec_commute_undefs(<4 x i32> %x) { ; CHECK-LABEL: @uadd_sat_constant_vec_commute_undefs( -; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) +; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> splat (i32 42)) ; CHECK-NEXT: ret <4 x i32> [[R]] ; %a = add <4 x i32> %x, @@ -2198,7 +2198,7 @@ define i32 @unsigned_sat_constant_using_min(i32 %x) { define <2 x i32> @unsigned_sat_constant_using_min_splat(<2 x i32> %x) { ; CHECK-LABEL: @unsigned_sat_constant_using_min_splat( -; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 -15)) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %c = icmp ult <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/scalarization.ll b/llvm/test/Transforms/InstCombine/scalarization.ll index 591437b72c1fc4..a6931b4c41d2d7 100644 --- a/llvm/test/Transforms/InstCombine/scalarization.ll +++ b/llvm/test/Transforms/InstCombine/scalarization.ll @@ -161,7 +161,7 @@ define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) { define i8 @extract_element_binop_splat_variable_index_may_trap(<4 x i8> %x, <4 x i8> %y, i32 %z) { ; ; CHECK-LABEL: @extract_element_binop_splat_variable_index_may_trap( -; CHECK-NEXT: [[B:%.*]] = sdiv <4 x i8> , [[Y:%.*]] +; CHECK-NEXT: [[B:%.*]] = sdiv <4 x i8> splat (i8 42), [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll b/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll index 3184395ccb12b8..e3e4a0c4a8f9cb 100644 --- a/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll +++ b/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll @@ -107,8 +107,8 @@ define i32 @sdiv_abs_nsw(i32 %x) { define <4 x i32> @sdiv_abs_nsw_vec(<4 x i32> %x) { ; CHECK-LABEL: @sdiv_abs_nsw_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 1), <4 x i32> splat (i32 -1) ; CHECK-NEXT: ret <4 x i32> [[R]] ; %a = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll index 3451c5e626f308..a3133ef82d5525 100644 --- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll +++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll @@ -27,7 +27,7 @@ define i8 @n1(i8 %x) { define <2 x i8> @t2_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @t2_vec_splat( -; CHECK-NEXT: [[DIV_NEG:%.*]] = ashr exact <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[DIV_NEG:%.*]] = ashr exact <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: [[DIV:%.*]] = sub nsw <2 x i8> zeroinitializer, [[DIV_NEG]] ; CHECK-NEXT: ret <2 x i8> [[DIV]] ; @@ -100,8 +100,8 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) { define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec( -; CHECK-NEXT: [[A:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[D_NEG:%.*]] = ashr exact <2 x i8> [[A]], +; CHECK-NEXT: [[A:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 5) +; CHECK-NEXT: [[D_NEG:%.*]] = ashr exact <2 x i8> [[A]], splat (i8 4) ; CHECK-NEXT: [[D:%.*]] = sub nsw <2 x i8> zeroinitializer, [[D_NEG]] ; CHECK-NEXT: ret <2 x i8> [[D]] ; diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll index 36af8685f9ae39..f463f27acc882f 100644 --- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll +++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll @@ -37,7 +37,7 @@ define i8 @n2(i8 %x) { define <2 x i8> @t3_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @t3_vec_splat( -; CHECK-NEXT: [[DIV:%.*]] = ashr exact <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[DIV:%.*]] = ashr exact <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: ret <2 x i8> [[DIV]] ; %div = sdiv exact <2 x i8> %x, @@ -144,8 +144,8 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) { define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec( -; CHECK-NEXT: [[A:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i8> [[A]], +; CHECK-NEXT: [[A:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 3) +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i8> [[A]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[D]] ; %a = shl <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/select-and-or.ll b/llvm/test/Transforms/InstCombine/select-and-or.ll index 68bd28cf234b47..c3fbb3fe15e19e 100644 --- a/llvm/test/Transforms/InstCombine/select-and-or.ll +++ b/llvm/test/Transforms/InstCombine/select-and-or.ll @@ -539,7 +539,7 @@ define i1 @and_or2_multiuse(i1 %a, i1 %b, i1 %c) { define <2 x i1> @and_or1_vec(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @and_or1_vec( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> , <2 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> splat (i1 true), <2 x i1> [[B:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> [[TMP1]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -553,7 +553,7 @@ define <2 x i1> @and_or1_vec(<2 x i1> %a, <2 x i1> %b) { define <2 x i1> @and_or2_vec(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @and_or2_vec( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> , <2 x i1> [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> splat (i1 true), <2 x i1> [[A:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> [[TMP1]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -567,7 +567,7 @@ define <2 x i1> @and_or2_vec(<2 x i1> %a, <2 x i1> %b) { define <2 x i1> @and_or1_vec_commuted(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @and_or1_vec_commuted( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> , <2 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> splat (i1 true), <2 x i1> [[B:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> [[TMP1]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -581,7 +581,7 @@ define <2 x i1> @and_or1_vec_commuted(<2 x i1> %a, <2 x i1> %b) { define <2 x i1> @and_or2_vec_commuted(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @and_or2_vec_commuted( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> , <2 x i1> [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> splat (i1 true), <2 x i1> [[A:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> [[TMP1]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -673,7 +673,7 @@ define i1 @and_or3_multiuse(i1 %a, i1 %b, i32 %x, i32 %y) { define <2 x i1> @and_or3_vec(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @and_or3_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> , <2 x i1> [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> splat (i1 true), <2 x i1> [[A:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -686,7 +686,7 @@ define <2 x i1> @and_or3_vec(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> % define <2 x i1> @and_or3_vec_commuted(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @and_or3_vec_commuted( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> , <2 x i1> [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> splat (i1 true), <2 x i1> [[A:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -791,7 +791,7 @@ define <2 x i1> @or_and1_vec(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @or_and1_vec( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> [[A:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> , <2 x i1> [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c = call <2 x i1> @gen_v2i1() @@ -805,7 +805,7 @@ define <2 x i1> @or_and2_vec(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @or_and2_vec( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> , <2 x i1> [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c = call <2 x i1> @gen_v2i1() @@ -819,7 +819,7 @@ define <2 x i1> @or_and1_vec_commuted(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @or_and1_vec_commuted( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> [[A:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> , <2 x i1> [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c = call <2 x i1> @gen_v2i1() @@ -833,7 +833,7 @@ define <2 x i1> @or_and2_vec_commuted(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @or_and2_vec_commuted( ; CHECK-NEXT: [[C:%.*]] = call <2 x i1> @gen_v2i1() ; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> , <2 x i1> [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c = call <2 x i1> @gen_v2i1() @@ -938,7 +938,7 @@ define <2 x i1> @or_and3_vec(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> % ; CHECK-LABEL: @or_and3_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> , <2 x i1> [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[TMP2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c = icmp eq <2 x i32> %x, %y @@ -951,7 +951,7 @@ define <2 x i1> @or_and3_vec_commuted(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 ; CHECK-LABEL: @or_and3_vec_commuted( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> , <2 x i1> [[TMP2]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[TMP2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %c = icmp eq <2 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/select-bitext.ll b/llvm/test/Transforms/InstCombine/select-bitext.ll index 4c09448aa83391..553d5ae155153f 100644 --- a/llvm/test/Transforms/InstCombine/select-bitext.ll +++ b/llvm/test/Transforms/InstCombine/select-bitext.ll @@ -51,7 +51,7 @@ define i64 @sel_sext(i32 %a, i1 %cmp) { define <4 x i64> @sel_sext_vec(<4 x i32> %a, <4 x i1> %cmp) { ; CHECK-LABEL: @sel_sext_vec( ; CHECK-NEXT: [[TMP1:%.*]] = sext <4 x i32> [[A:%.*]] to <4 x i64> -; CHECK-NEXT: [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> +; CHECK-NEXT: [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> splat (i64 42) ; CHECK-NEXT: ret <4 x i64> [[EXT]] ; %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> @@ -73,7 +73,7 @@ define i64 @sel_zext(i32 %a, i1 %cmp) { define <4 x i64> @sel_zext_vec(<4 x i32> %a, <4 x i1> %cmp) { ; CHECK-LABEL: @sel_zext_vec( ; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[A:%.*]] to <4 x i64> -; CHECK-NEXT: [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> +; CHECK-NEXT: [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> splat (i64 42) ; CHECK-NEXT: ret <4 x i64> [[EXT]] ; %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> @@ -152,8 +152,8 @@ define i32 @trunc_sel_equal_sext(i32 %a, i1 %cmp) { define <2 x i32> @trunc_sel_equal_sext_vec(<2 x i32> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_equal_sext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr exact <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 16) +; CHECK-NEXT: [[TMP2:%.*]] = ashr exact <2 x i32> [[TMP1]], splat (i32 16) ; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[EXT]] ; @@ -178,7 +178,7 @@ define i64 @trunc_sel_larger_zext(i32 %a, i1 %cmp) { define <2 x i64> @trunc_sel_larger_zext_vec(<2 x i32> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_larger_zext_vec( -; CHECK-NEXT: [[TRUNC_MASK:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TRUNC_MASK:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 65535) ; CHECK-NEXT: [[TMP1:%.*]] = zext nneg <2 x i32> [[TRUNC_MASK]] to <2 x i64> ; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i64> [[TMP1]], <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[EXT]] @@ -205,7 +205,7 @@ define i32 @trunc_sel_smaller_zext(i64 %a, i1 %cmp) { define <2 x i32> @trunc_sel_smaller_zext_vec(<2 x i64> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_smaller_zext_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 65535) ; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[EXT]] ; @@ -229,7 +229,7 @@ define i32 @trunc_sel_equal_zext(i32 %a, i1 %cmp) { define <2 x i32> @trunc_sel_equal_zext_vec(<2 x i32> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_equal_zext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 65535) ; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[EXT]] ; @@ -578,7 +578,7 @@ define i32 @sext_true_val_must_be_all_ones(i1 %x) { define <2 x i32> @sext_true_val_must_be_all_ones_vec(<2 x i1> %x) { ; CHECK-LABEL: @sext_true_val_must_be_all_ones_vec( -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> , <2 x i32> , !prof [[PROF0]] +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> splat (i32 -1), <2 x i32> , !prof [[PROF0]] ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %ext = sext <2 x i1> %x to <2 x i32> @@ -598,7 +598,7 @@ define i32 @zext_true_val_must_be_one(i1 %x) { define <2 x i32> @zext_true_val_must_be_one_vec(<2 x i1> %x) { ; CHECK-LABEL: @zext_true_val_must_be_one_vec( -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> , <2 x i32> , !prof [[PROF0]] +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> splat (i32 1), <2 x i32> , !prof [[PROF0]] ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %ext = zext <2 x i1> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/select-divrem.ll b/llvm/test/Transforms/InstCombine/select-divrem.ll index e11afd7b543b20..a674f9c64b2001 100644 --- a/llvm/test/Transforms/InstCombine/select-divrem.ll +++ b/llvm/test/Transforms/InstCombine/select-divrem.ll @@ -285,7 +285,7 @@ define i32 @rem_euclid_wrong_operands_select(i32 %0) { define <2 x i32> @rem_euclid_vec(<2 x i32> %0) { ; CHECK-LABEL: @rem_euclid_vec( -; CHECK-NEXT: [[SEL:%.*]] = and <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[SEL:%.*]] = and <2 x i32> [[TMP0:%.*]], splat (i32 7) ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %rem = srem <2 x i32> %0, diff --git a/llvm/test/Transforms/InstCombine/select-extractelement.ll b/llvm/test/Transforms/InstCombine/select-extractelement.ll index e5b4fe5051e103..621d278dee6e5f 100644 --- a/llvm/test/Transforms/InstCombine/select-extractelement.ll +++ b/llvm/test/Transforms/InstCombine/select-extractelement.ll @@ -215,7 +215,7 @@ define <4 x i32> @extract_cond_type_mismatch(<4 x i32> %x, <4 x i32> %y, <5 x i1 define i32 @inf_loop_partial_undef(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @inf_loop_partial_undef( -; CHECK-NEXT: [[T5:%.*]] = add nsw <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[T5:%.*]] = add nsw <2 x i32> [[Y:%.*]], splat (i32 2147483647) ; CHECK-NEXT: [[T6:%.*]] = icmp sge <2 x i32> [[T5]], [[X:%.*]] ; CHECK-NEXT: [[AB:%.*]] = and <2 x i1> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[T7:%.*]] = select <2 x i1> [[AB]], <2 x i1> [[T6]], <2 x i1> diff --git a/llvm/test/Transforms/InstCombine/select-factorize.ll b/llvm/test/Transforms/InstCombine/select-factorize.ll index ab9d9f6b24754d..dc0e3b54cca578 100644 --- a/llvm/test/Transforms/InstCombine/select-factorize.ll +++ b/llvm/test/Transforms/InstCombine/select-factorize.ll @@ -101,7 +101,7 @@ define i1 @logic_and_logic_or_8(i1 %c, i1 %a, i1 %b) { define <3 x i1> @logic_and_logic_or_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @logic_and_logic_or_vector( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[TMP1]], <3 x i1> zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -113,7 +113,7 @@ define <3 x i1> @logic_and_logic_or_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b define <3 x i1> @logic_and_logic_or_vector_poison1(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @logic_and_logic_or_vector_poison1( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[TMP1]], <3 x i1> zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -127,7 +127,7 @@ define <3 x i1> @logic_and_logic_or_vector_poison2(<3 x i1> %c, <3 x i1> %a, <3 ; CHECK-LABEL: @logic_and_logic_or_vector_poison2( ; CHECK-NEXT: [[AC:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[A:%.*]], <3 x i1> ; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[C]], <3 x i1> [[B:%.*]], <3 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[AC]], <3 x i1> , <3 x i1> [[BC]] +; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[AC]], <3 x i1> splat (i1 true), <3 x i1> [[BC]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; %ac = select <3 x i1> %c, <3 x i1> %a, <3 x i1> @@ -138,7 +138,7 @@ define <3 x i1> @logic_and_logic_or_vector_poison2(<3 x i1> %c, <3 x i1> %a, <3 define <3 x i1> @logic_and_logic_or_vector_poison3(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @logic_and_logic_or_vector_poison3( -; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[BC]], <3 x i1> ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -265,7 +265,7 @@ define i1 @and_logic_and_logic_or_8(i1 %c, i1 %a, i1 %b) { define <3 x i1> @and_logic_and_logic_or_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @and_logic_and_logic_or_vector( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[TMP1]], <3 x i1> zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -277,7 +277,7 @@ define <3 x i1> @and_logic_and_logic_or_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1 define <3 x i1> @and_logic_and_logic_or_vector_poison1(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @and_logic_and_logic_or_vector_poison1( -; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[BC]], <3 x i1> ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -289,7 +289,7 @@ define <3 x i1> @and_logic_and_logic_or_vector_poison1(<3 x i1> %c, <3 x i1> %a, define <3 x i1> @and_logic_and_logic_or_vector_poison2(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @and_logic_and_logic_or_vector_poison2( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[TMP1]], <3 x i1> zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -342,7 +342,7 @@ define i1 @and_and_logic_or_2(i1 %c, i1 %a, i1 %b) { define <3 x i1> @and_and_logic_or_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @and_and_logic_or_vector( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = and <3 x i1> [[C:%.*]], [[TMP1]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -354,7 +354,7 @@ define <3 x i1> @and_and_logic_or_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) define <3 x i1> @and_and_logic_or_vector_poison(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @and_and_logic_or_vector_poison( -; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = and <3 x i1> [[C:%.*]], [[TMP1]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -480,7 +480,7 @@ define i1 @logic_or_logic_and_8(i1 %c, i1 %a, i1 %b) { define <3 x i1> @logic_or_logic_and_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @logic_or_logic_and_vector( ; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> [[B:%.*]], <3 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> , <3 x i1> [[TMP1]] +; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[TMP1]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; %ac = select <3 x i1> %c, <3 x i1> , <3 x i1> %a @@ -492,7 +492,7 @@ define <3 x i1> @logic_or_logic_and_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b define <3 x i1> @logic_or_logic_and_vector_poison1(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @logic_or_logic_and_vector_poison1( ; CHECK-NEXT: [[AC:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> , <3 x i1> [[A:%.*]] -; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[C]], <3 x i1> , <3 x i1> [[B:%.*]] +; CHECK-NEXT: [[BC:%.*]] = select <3 x i1> [[C]], <3 x i1> splat (i1 true), <3 x i1> [[B:%.*]] ; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[AC]], <3 x i1> [[BC]], <3 x i1> zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[OR]] ; @@ -517,7 +517,7 @@ define <3 x i1> @logic_or_logic_and_vector_poison2(<3 x i1> %c, <3 x i1> %a, <3 define <3 x i1> @logic_or_logic_and_vector_poison3(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @logic_or_logic_and_vector_poison3( ; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> [[B:%.*]], <3 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> , <3 x i1> [[TMP1]] +; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[TMP1]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; %ac = select <3 x i1> %c, <3 x i1> , <3 x i1> %a @@ -644,7 +644,7 @@ define i1 @or_logic_or_logic_and_8(i1 %c, i1 %a, i1 %b) { define <3 x i1> @or_logic_or_logic_and_vector(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @or_logic_or_logic_and_vector( ; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> [[B:%.*]], <3 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> , <3 x i1> [[TMP1]] +; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[TMP1]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; %ac = or <3 x i1> %c, %a @@ -668,7 +668,7 @@ define <3 x i1> @or_logic_or_logic_and_vector_poison1(<3 x i1> %c, <3 x i1> %a, define <3 x i1> @or_logic_or_logic_and_vector_poison2(<3 x i1> %c, <3 x i1> %a, <3 x i1> %b) { ; CHECK-LABEL: @or_logic_or_logic_and_vector_poison2( ; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> [[A:%.*]], <3 x i1> [[B:%.*]], <3 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> , <3 x i1> [[TMP1]] +; CHECK-NEXT: [[OR:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> splat (i1 true), <3 x i1> [[TMP1]] ; CHECK-NEXT: ret <3 x i1> [[OR]] ; %ac = or <3 x i1> %c, %a diff --git a/llvm/test/Transforms/InstCombine/select-icmp-and-zero-shl.ll b/llvm/test/Transforms/InstCombine/select-icmp-and-zero-shl.ll index fe794a33ece2d2..d62edf2fe39330 100644 --- a/llvm/test/Transforms/InstCombine/select-icmp-and-zero-shl.ll +++ b/llvm/test/Transforms/InstCombine/select-icmp-and-zero-shl.ll @@ -17,7 +17,7 @@ define i32 @test_eq(i32 %x) { define <2 x i32> @test_eq_vect(<2 x i32> %x) { ; CHECK-LABEL: @test_eq_vect( -; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %shl.mask = and <2 x i32> %x, @@ -41,7 +41,7 @@ define i32 @test_ne(i32 %x) { define <2 x i32> @test_ne_vect(<2 x i32> %x) { ; CHECK-LABEL: @test_ne_vect( -; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %shl.mask = and <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/select-icmp-and.ll b/llvm/test/Transforms/InstCombine/select-icmp-and.ll index a57a7c5e32e494..219a66c314a07d 100644 --- a/llvm/test/Transforms/InstCombine/select-icmp-and.ll +++ b/llvm/test/Transforms/InstCombine/select-icmp-and.ll @@ -40,8 +40,8 @@ define i32 @test35(i32 %x) { define <2 x i32> @test35vec(<2 x i32> %x) { ; CHECK-LABEL: @test35vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> splat (i32 60), <2 x i32> splat (i32 100) ; CHECK-NEXT: ret <2 x i32> [[COND]] ; %cmp = icmp sge <2 x i32> %x, @@ -77,7 +77,7 @@ define i32 @test36(i32 %x) { define <2 x i32> @test36vec(<2 x i32> %x) { ; CHECK-LABEL: @test36vec( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> splat (i32 60), <2 x i32> splat (i32 100) ; CHECK-NEXT: ret <2 x i32> [[COND]] ; %cmp = icmp slt <2 x i32> %x, @@ -98,8 +98,8 @@ define i32 @test37(i32 %x) { define <2 x i32> @test37vec(<2 x i32> %x) { ; CHECK-LABEL: @test37vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> splat (i32 1), <2 x i32> splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[COND]] ; %cmp = icmp sgt <2 x i32> %x, @@ -122,9 +122,9 @@ define i32 @test65(i64 %x) { define <2 x i32> @test65vec(<2 x i64> %x) { ; CHECK-LABEL: @test65vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], splat (i64 16) ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = and <2 x i64> %x, @@ -148,9 +148,9 @@ define i32 @test66(i64 %x) { define <2 x i32> @test66vec(<2 x i64> %x) { ; CHECK-LABEL: @test66vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], splat (i64 4294967296) ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = and <2 x i64> %x, @@ -164,7 +164,7 @@ define <2 x i32> @test66vec_scalar_and(i64 %x) { ; CHECK-LABEL: @test66vec_scalar_and( ; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967296 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[DOTNOT]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = and i64 %x, 4294967296 @@ -188,9 +188,9 @@ define i32 @test67(i16 %x) { define <2 x i32> @test67vec(<2 x i16> %x) { ; CHECK-LABEL: @test67vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 4) ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <2 x i16> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = and <2 x i16> %x, @@ -214,9 +214,9 @@ define i32 @test71(i32 %x) { define <2 x i32> @test71vec(<2 x i32> %x) { ; CHECK-LABEL: @test71vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = and <2 x i32> %x, @@ -240,9 +240,9 @@ define i32 @test72(i32 %x) { define <2 x i32> @test72vec(<2 x i32> %x) { ; CHECK-LABEL: @test72vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 40), <2 x i32> splat (i32 42) ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = and <2 x i32> %x, @@ -266,9 +266,9 @@ define i32 @test73(i32 %x) { define <2 x i32> @test73vec(<2 x i32> %x) { ; CHECK-LABEL: @test73vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 40), <2 x i32> splat (i32 42) ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %1 = trunc <2 x i32> %x to <2 x i8> @@ -290,8 +290,8 @@ define i32 @test74(i32 %x) { define <2 x i32> @test74vec(<2 x i32> %x) { ; CHECK-LABEL: @test74vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> splat (i32 40), <2 x i32> splat (i32 42) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %1 = icmp sgt <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/select-masked_load.ll b/llvm/test/Transforms/InstCombine/select-masked_load.ll index 51525e5ee83467..b6bac612d6f9b2 100644 --- a/llvm/test/Transforms/InstCombine/select-masked_load.ll +++ b/llvm/test/Transforms/InstCombine/select-masked_load.ll @@ -38,7 +38,7 @@ define <4 x i32> @masked_load_and_zero_inactive_3(ptr %ptr, <4 x i1> %mask, <4 x ; Remove redundant select when its mask doesn't overlap with the load mask. define <4 x i32> @masked_load_and_zero_inactive_4(ptr %ptr, <4 x i1> %inv_mask) { ; CHECK-LABEL: @masked_load_and_zero_inactive_4( -; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], +; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], splat (i1 true) ; CHECK-NEXT: [[LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[PTR:%.*]], i32 4, <4 x i1> [[MASK]], <4 x i32> zeroinitializer) ; CHECK-NEXT: ret <4 x i32> [[LOAD]] ; @@ -51,7 +51,7 @@ define <4 x i32> @masked_load_and_zero_inactive_4(ptr %ptr, <4 x i1> %inv_mask) ; As above but reuse the load's existing passthrough. define <4 x i32> @masked_load_and_zero_inactive_5(ptr %ptr, <4 x i1> %inv_mask) { ; CHECK-LABEL: @masked_load_and_zero_inactive_5( -; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], +; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], splat (i1 true) ; CHECK-NEXT: [[LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[PTR:%.*]], i32 4, <4 x i1> [[MASK]], <4 x i32> zeroinitializer) ; CHECK-NEXT: ret <4 x i32> [[LOAD]] ; @@ -64,7 +64,7 @@ define <4 x i32> @masked_load_and_zero_inactive_5(ptr %ptr, <4 x i1> %inv_mask) ; No transform when the load's passthrough cannot be reused or altered. define <4 x i32> @masked_load_and_zero_inactive_6(ptr %ptr, <4 x i1> %inv_mask, <4 x i32> %passthrough) { ; CHECK-LABEL: @masked_load_and_zero_inactive_6( -; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], +; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], splat (i1 true) ; CHECK-NEXT: [[LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[PTR:%.*]], i32 4, <4 x i1> [[MASK]], <4 x i32> [[PASSTHROUGH:%.*]]) ; CHECK-NEXT: [[MASKED:%.*]] = select <4 x i1> [[INV_MASK]], <4 x i32> zeroinitializer, <4 x i32> [[LOAD]] ; CHECK-NEXT: ret <4 x i32> [[MASKED]] @@ -91,7 +91,7 @@ define <4 x i32> @masked_load_and_zero_inactive_7(ptr %ptr, <4 x i1> %mask1, <4 ; load's inactive lanes and thus the load's passthrough takes effect. define <4 x float> @masked_load_and_zero_inactive_8(ptr %ptr, <4 x i1> %inv_mask, <4 x i1> %cond) { ; CHECK-LABEL: @masked_load_and_zero_inactive_8( -; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], +; CHECK-NEXT: [[MASK:%.*]] = xor <4 x i1> [[INV_MASK:%.*]], splat (i1 true) ; CHECK-NEXT: [[PG:%.*]] = and <4 x i1> [[COND:%.*]], [[MASK]] ; CHECK-NEXT: [[LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[PTR:%.*]], i32 4, <4 x i1> [[PG]], <4 x float> zeroinitializer) ; CHECK-NEXT: ret <4 x float> [[LOAD]] diff --git a/llvm/test/Transforms/InstCombine/select-of-bittest.ll b/llvm/test/Transforms/InstCombine/select-of-bittest.ll index 0c7624018cb02c..4495e7f6ed067a 100644 --- a/llvm/test/Transforms/InstCombine/select-of-bittest.ll +++ b/llvm/test/Transforms/InstCombine/select-of-bittest.ll @@ -22,7 +22,7 @@ define i32 @and_lshr_and(i32 %arg) { define <2 x i32> @and_lshr_and_splatvec(<2 x i32> %arg) { ; CHECK-LABEL: @and_lshr_and_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], splat (i32 3) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[T4:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T4]] @@ -111,7 +111,7 @@ define i32 @and_and(i32 %arg) { define <2 x i32> @and_and_splatvec(<2 x i32> %arg) { ; CHECK-LABEL: @and_and_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], splat (i32 3) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[T3]] @@ -190,7 +190,7 @@ define i32 @f_var0_commutative_and(i32 %arg, i32 %arg1) { define <2 x i32> @f_var0_splatvec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-LABEL: @f_var0_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[ARG:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[T5:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> @@ -270,7 +270,7 @@ define i32 @f_var1_commutative_and(i32 %arg, i32 %arg1) { define <2 x i32> @f_var1_vec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-LABEL: @f_var1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[ARG:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[T4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32> @@ -285,7 +285,7 @@ define <2 x i32> @f_var1_vec(<2 x i32> %arg, <2 x i32> %arg1) { define <3 x i32> @f_var1_vec_poison(<3 x i32> %arg, <3 x i32> %arg1) { ; CHECK-LABEL: @f_var1_vec_poison( -; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[ARG:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[T4:%.*]] = zext <3 x i1> [[TMP3]] to <3 x i32> @@ -321,11 +321,11 @@ define i32 @f_var2(i32 %arg, i32 %arg1) { define <2 x i32> @f_var2_splatvec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-LABEL: @f_var2_splatvec( -; CHECK-NEXT: [[T:%.*]] = and <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T:%.*]] = and <2 x i32> [[ARG:%.*]], splat (i32 1) ; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T]], zeroinitializer ; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[ARG]], [[ARG1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T3]], -; CHECK-NEXT: [[T5:%.*]] = select <2 x i1> [[T2]], <2 x i32> [[T4]], <2 x i32> +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T3]], splat (i32 1) +; CHECK-NEXT: [[T5:%.*]] = select <2 x i1> [[T2]], <2 x i32> [[T4]], <2 x i32> splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[T5]] ; %t = and <2 x i32> %arg, @@ -341,8 +341,8 @@ define <2 x i32> @f_var2_vec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-NEXT: [[T:%.*]] = and <2 x i32> [[ARG:%.*]], ; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T]], zeroinitializer ; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[ARG]], [[ARG1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T3]], -; CHECK-NEXT: [[T5:%.*]] = select <2 x i1> [[T2]], <2 x i32> [[T4]], <2 x i32> +; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T3]], splat (i32 1) +; CHECK-NEXT: [[T5:%.*]] = select <2 x i1> [[T2]], <2 x i32> [[T4]], <2 x i32> splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[T5]] ; %t = and <2 x i32> %arg, ; mask is not splat @@ -414,8 +414,8 @@ define <2 x i32> @f_var3_splatvec(<2 x i32> %arg, <2 x i32> %arg1, <2 x i32> %ar ; CHECK-NEXT: [[T:%.*]] = and <2 x i32> [[ARG:%.*]], [[ARG1:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp eq <2 x i32> [[T]], zeroinitializer ; CHECK-NEXT: [[T4:%.*]] = lshr <2 x i32> [[ARG]], [[ARG2:%.*]] -; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], -; CHECK-NEXT: [[T6:%.*]] = select <2 x i1> [[T3]], <2 x i32> [[T5]], <2 x i32> +; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], splat (i32 1) +; CHECK-NEXT: [[T6:%.*]] = select <2 x i1> [[T3]], <2 x i32> [[T5]], <2 x i32> splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[T6]] ; %t = and <2 x i32> %arg, %arg1 diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll index e0306972e48e2c..ebea5bf2eadf44 100644 --- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll +++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll @@ -223,9 +223,9 @@ define i1 @andn_or_cmp_2_partial_logical_commute(i16 %a, i16 %b) { define <2 x i1> @not_logical_or(i1 %b, <2 x i32> %a) { ; CHECK-LABEL: @not_logical_or( -; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[IMPLIED:%.*]] = icmp slt <2 x i32> [[A]], -; CHECK-NEXT: [[OR:%.*]] = select i1 [[B:%.*]], <2 x i1> , <2 x i1> [[IMPLIED]] +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 3) +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp slt <2 x i32> [[A]], splat (i32 -1) +; CHECK-NEXT: [[OR:%.*]] = select i1 [[B:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[IMPLIED]] ; CHECK-NEXT: [[AND:%.*]] = select <2 x i1> [[COND]], <2 x i1> [[OR]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[AND]] ; @@ -240,9 +240,9 @@ define <2 x i1> @not_logical_or(i1 %b, <2 x i32> %a) { define <2 x i1> @not_logical_or2(i1 %b, <2 x i32> %a) { ; CHECK-LABEL: @not_logical_or2( -; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[IMPLIED:%.*]] = icmp slt <2 x i32> [[A]], -; CHECK-NEXT: [[OR:%.*]] = select i1 [[B:%.*]], <2 x i1> , <2 x i1> [[IMPLIED]] +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 3) +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp slt <2 x i32> [[A]], splat (i32 -1) +; CHECK-NEXT: [[OR:%.*]] = select i1 [[B:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[IMPLIED]] ; CHECK-NEXT: [[AND:%.*]] = select <2 x i1> [[OR]], <2 x i1> [[COND]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[AND]] ; @@ -764,10 +764,10 @@ define i1 @orn_and_cmp_2_partial_logical_commute(i16 %a, i16 %b) { define <2 x i1> @not_logical_and(i1 %b, <2 x i32> %a) { ; CHECK-LABEL: @not_logical_and( -; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 3) +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], splat (i32 1) ; CHECK-NEXT: [[AND:%.*]] = select i1 [[B:%.*]], <2 x i1> [[COND]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[IMPLIED]], <2 x i1> , <2 x i1> [[AND]] +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[IMPLIED]], <2 x i1> splat (i1 true), <2 x i1> [[AND]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %cond = icmp ult <2 x i32> %a, @@ -781,10 +781,10 @@ define <2 x i1> @not_logical_and(i1 %b, <2 x i32> %a) { define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) { ; CHECK-LABEL: @not_logical_and2( -; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 3) +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], splat (i32 1) ; CHECK-NEXT: [[AND:%.*]] = select i1 [[B:%.*]], <2 x i1> [[COND]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[AND]], <2 x i1> , <2 x i1> [[IMPLIED]] +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[AND]], <2 x i1> splat (i1 true), <2 x i1> [[IMPLIED]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %cond = icmp ult <2 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/select-value-equivalence.ll b/llvm/test/Transforms/InstCombine/select-value-equivalence.ll index 28984282511924..433fafc7e553b8 100644 --- a/llvm/test/Transforms/InstCombine/select-value-equivalence.ll +++ b/llvm/test/Transforms/InstCombine/select-value-equivalence.ll @@ -4,8 +4,8 @@ define <2 x i8> @select_icmp_insertelement_eq(<2 x i8> %x, <2 x i8> %y, i8 %i) { ; CHECK-LABEL: define <2 x i8> @select_icmp_insertelement_eq( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], i8 [[I:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], -; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x i8> , i8 0, i8 [[I]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], splat (i8 2) +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x i8> splat (i8 2), i8 0, i8 [[I]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[INSERT]], <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[RETVAL]] ; @@ -18,8 +18,8 @@ define <2 x i8> @select_icmp_insertelement_eq(<2 x i8> %x, <2 x i8> %y, i8 %i) { define <2 x i8> @select_icmp_insertelement_ne(<2 x i8> %x, <2 x i8> %y, i8 %i) { ; CHECK-LABEL: define <2 x i8> @select_icmp_insertelement_ne( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], i8 [[I:%.*]]) { -; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <2 x i8> [[Y]], -; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x i8> , i8 0, i8 [[I]] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq <2 x i8> [[Y]], splat (i8 2) +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x i8> splat (i8 2), i8 0, i8 [[I]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[CMP_NOT]], <2 x i8> [[INSERT]], <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[RETVAL]] ; @@ -32,8 +32,8 @@ define <2 x i8> @select_icmp_insertelement_ne(<2 x i8> %x, <2 x i8> %y, i8 %i) { define <2 x i8> @select_icmp_shufflevector_identity(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i8> @select_icmp_shufflevector_identity( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], -; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> , <2 x i8> [[X]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], splat (i8 2) +; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 2), <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[RETVAL]] ; %cmp = icmp eq <2 x i8> %y, @@ -45,7 +45,7 @@ define <2 x i8> @select_icmp_shufflevector_identity(<2 x i8> %x, <2 x i8> %y) { define <4 x i8> @select_icmp_shufflevector_select(<4 x i8> %x, <4 x i8> %y, <4 x i8> %z) { ; CHECK-LABEL: define <4 x i8> @select_icmp_shufflevector_select( ; CHECK-SAME: <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i8> [[Z:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i8> [[Y]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i8> [[Y]], splat (i8 2) ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i8> [[Z]], <4 x i8> , <4 x i32> ; CHECK-NEXT: [[RETVAL:%.*]] = select <4 x i1> [[CMP]], <4 x i8> [[SHUFFLE]], <4 x i8> [[X]] ; CHECK-NEXT: ret <4 x i8> [[RETVAL]] @@ -59,7 +59,7 @@ define <4 x i8> @select_icmp_shufflevector_select(<4 x i8> %x, <4 x i8> %y, <4 x define <2 x i8> @select_icmp_shufflevector_lanecrossing(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i8> @select_icmp_shufflevector_lanecrossing( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], splat (i8 2) ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i8> [[Y]], <2 x i8> poison, <2 x i32> ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[SHUFFLE]], <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[RETVAL]] @@ -75,7 +75,7 @@ declare <2 x i8> @fn(<2 x i8>) speculatable define <2 x i8> @select_icmp_call_possibly_lanecrossing(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: define <2 x i8> @select_icmp_call_possibly_lanecrossing( ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[Y]], splat (i8 2) ; CHECK-NEXT: [[CALL:%.*]] = call <2 x i8> @fn(<2 x i8> [[Y]]) ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[CALL]], <2 x i8> [[X]] ; CHECK-NEXT: ret <2 x i8> [[RETVAL]] @@ -201,7 +201,7 @@ define float @select_fcmp_fadd_une_zero(float %x, float %y) { define <2 x float> @select_fcmp_fadd_oeq_not_zero_vec(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @select_fcmp_fadd_oeq_not_zero_vec( ; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[FCMP:%.*]] = fcmp oeq <2 x float> [[Y]], +; CHECK-NEXT: [[FCMP:%.*]] = fcmp oeq <2 x float> [[Y]], splat (float 2.000000e+00) ; CHECK-NEXT: [[FADD:%.*]] = fadd <2 x float> [[X]], [[Y]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[FCMP]], <2 x float> [[FADD]], <2 x float> [[X]] ; CHECK-NEXT: ret <2 x float> [[RETVAL]] @@ -215,7 +215,7 @@ define <2 x float> @select_fcmp_fadd_oeq_not_zero_vec(<2 x float> %x, <2 x float define <2 x float> @select_fcmp_fadd_une_not_zero_vec(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @select_fcmp_fadd_une_not_zero_vec( ; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[FCMP:%.*]] = fcmp une <2 x float> [[Y]], +; CHECK-NEXT: [[FCMP:%.*]] = fcmp une <2 x float> [[Y]], splat (float 2.000000e+00) ; CHECK-NEXT: [[FADD:%.*]] = fadd <2 x float> [[X]], [[Y]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[FCMP]], <2 x float> [[X]], <2 x float> [[FADD]] ; CHECK-NEXT: ret <2 x float> [[RETVAL]] @@ -229,7 +229,7 @@ define <2 x float> @select_fcmp_fadd_une_not_zero_vec(<2 x float> %x, <2 x float define <2 x float> @select_fcmp_fadd_ueq_nnan_not_zero_vec(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @select_fcmp_fadd_ueq_nnan_not_zero_vec( ; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[FCMP:%.*]] = fcmp nnan ueq <2 x float> [[Y]], +; CHECK-NEXT: [[FCMP:%.*]] = fcmp nnan ueq <2 x float> [[Y]], splat (float 2.000000e+00) ; CHECK-NEXT: [[FADD:%.*]] = fadd <2 x float> [[X]], [[Y]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[FCMP]], <2 x float> [[FADD]], <2 x float> [[X]] ; CHECK-NEXT: ret <2 x float> [[RETVAL]] @@ -243,7 +243,7 @@ define <2 x float> @select_fcmp_fadd_ueq_nnan_not_zero_vec(<2 x float> %x, <2 x define <2 x float> @select_fcmp_fadd_one_nnan_not_zero_vec(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @select_fcmp_fadd_one_nnan_not_zero_vec( ; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[FCMP:%.*]] = fcmp nnan one <2 x float> [[Y]], +; CHECK-NEXT: [[FCMP:%.*]] = fcmp nnan one <2 x float> [[Y]], splat (float 2.000000e+00) ; CHECK-NEXT: [[FADD:%.*]] = fadd <2 x float> [[X]], [[Y]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[FCMP]], <2 x float> [[X]], <2 x float> [[FADD]] ; CHECK-NEXT: ret <2 x float> [[RETVAL]] @@ -257,7 +257,7 @@ define <2 x float> @select_fcmp_fadd_one_nnan_not_zero_vec(<2 x float> %x, <2 x define <2 x float> @select_fcmp_fadd_ueq_vec(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @select_fcmp_fadd_ueq_vec( ; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[FCMP:%.*]] = fcmp ueq <2 x float> [[Y]], +; CHECK-NEXT: [[FCMP:%.*]] = fcmp ueq <2 x float> [[Y]], splat (float 2.000000e+00) ; CHECK-NEXT: [[FADD:%.*]] = fadd <2 x float> [[X]], [[Y]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[FCMP]], <2 x float> [[FADD]], <2 x float> [[X]] ; CHECK-NEXT: ret <2 x float> [[RETVAL]] @@ -271,7 +271,7 @@ define <2 x float> @select_fcmp_fadd_ueq_vec(<2 x float> %x, <2 x float> %y) { define <2 x float> @select_fcmp_fadd_one_vec(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: define <2 x float> @select_fcmp_fadd_one_vec( ; CHECK-SAME: <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { -; CHECK-NEXT: [[FCMP:%.*]] = fcmp one <2 x float> [[Y]], +; CHECK-NEXT: [[FCMP:%.*]] = fcmp one <2 x float> [[Y]], splat (float 2.000000e+00) ; CHECK-NEXT: [[FADD:%.*]] = fadd <2 x float> [[X]], [[Y]] ; CHECK-NEXT: [[RETVAL:%.*]] = select <2 x i1> [[FCMP]], <2 x float> [[X]], <2 x float> [[FADD]] ; CHECK-NEXT: ret <2 x float> [[RETVAL]] diff --git a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll index 1647233595b37e..30e763ccea5900 100644 --- a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll +++ b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll @@ -22,8 +22,8 @@ define i32 @select_icmp_eq_and_1_0_or_2(i32 %x, i32 %y) { define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec( -; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -37,7 +37,7 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison1(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec_poison1( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw <2 x i32> [[AND]], +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw <2 x i32> [[AND]], splat (i32 1) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -50,8 +50,8 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison1(<2 x i32> %x, <2 x i32 define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison2(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec_poison2( -; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -64,8 +64,8 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison2(<2 x i32> %x, <2 x i32 define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison3(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec_poison3( -; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -121,8 +121,8 @@ define i32 @select_icmp_eq_and_32_0_or_8(i32 %x, i32 %y) { define <2 x i32> @select_icmp_eq_and_32_0_or_8_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_32_0_or_8_vec( -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 2) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 8) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -178,8 +178,8 @@ define i32 @select_icmp_ne_0_and_4096_or_4096(i32 %x, i32 %y) { define <2 x i32> @select_icmp_ne_0_and_4096_or_4096_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_4096_vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 4096) +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[AND]], splat (i32 4096) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -234,7 +234,7 @@ define i32 @select_icmp_eq_and_4096_0_or_4096(i32 %x, i32 %y) { define <2 x i32> @select_icmp_eq_and_4096_0_or_4096_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_4096_0_or_4096_vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 4096) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[AND]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -290,7 +290,7 @@ define i32 @select_icmp_eq_0_and_1_or_1(i64 %x, i32 %y) { define <2 x i32> @select_icmp_eq_0_and_1_or_1_vec(<2 x i64> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1_vec( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -392,9 +392,9 @@ define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) { define <2 x i32> @select_icmp_ne_0_and_32_or_4096_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_ne_0_and_32_or_4096_vec( -; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[AND:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 7) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 4096) +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 4096) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -531,7 +531,7 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) { ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_vector_of_2s( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[Y:%.*]], splat (i32 2) ; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x i32> [[Y]], <2 x i32> [[OR]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -612,8 +612,8 @@ define i64 @select_icmp_x_and_8_ne_0_y_or_8(i32 %x, i64 %y) { define <2 x i64> @select_icmp_x_and_8_ne_0_y_or_8_vec(<2 x i32> %x, <2 x i64> %y) { ; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_or_8_vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[AND]], splat (i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[OR_Y:%.*]] = or <2 x i64> [[Y:%.*]], [[TMP2]] ; CHECK-NEXT: ret <2 x i64> [[OR_Y]] @@ -692,8 +692,8 @@ define i32 @test68(i32 %x, i32 %y) { define <2 x i32> @test68vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test68vec( -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 6) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -750,9 +750,9 @@ define i32 @test69(i32 %x, i32 %y) { define <2 x i32> @test69vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test69vec( -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 6) +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[SELECT]] ; @@ -1563,8 +1563,8 @@ define i8 @set_bits_extra_use2(i8 %x, i1 %b) { define <2 x i8> @clear_bits(<2 x i8> %x, <2 x i1> %b) { ; CHECK-LABEL: @clear_bits( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[MASKSEL:%.*]] = select <2 x i1> [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 37) +; CHECK-NEXT: [[MASKSEL:%.*]] = select <2 x i1> [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> splat (i8 -38) ; CHECK-NEXT: [[COND:%.*]] = or disjoint <2 x i8> [[AND]], [[MASKSEL]] ; CHECK-NEXT: ret <2 x i8> [[COND]] ; @@ -1579,7 +1579,7 @@ define <2 x i8> @clear_bits(<2 x i8> %x, <2 x i1> %b) { define <2 x i8> @clear_bits_not_inverse_constant(<2 x i8> %x, <2 x i1> %b) { ; CHECK-LABEL: @clear_bits_not_inverse_constant( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[X]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[X]], splat (i8 -38) ; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[B:%.*]], <2 x i8> [[AND]], <2 x i8> [[OR]] ; CHECK-NEXT: ret <2 x i8> [[COND]] ; @@ -1591,9 +1591,9 @@ define <2 x i8> @clear_bits_not_inverse_constant(<2 x i8> %x, <2 x i1> %b) { define <2 x i8> @clear_bits_extra_use1(<2 x i8> %x, i1 %b) { ; CHECK-LABEL: @clear_bits_extra_use1( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 37) ; CHECK-NEXT: call void @use_vec(<2 x i8> [[AND]]) -; CHECK-NEXT: [[MASKSEL:%.*]] = select i1 [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> +; CHECK-NEXT: [[MASKSEL:%.*]] = select i1 [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> splat (i8 -38) ; CHECK-NEXT: [[COND:%.*]] = or disjoint <2 x i8> [[AND]], [[MASKSEL]] ; CHECK-NEXT: ret <2 x i8> [[COND]] ; diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 7d62b419424405..8b394afc70aca5 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -34,7 +34,7 @@ define i1 @trueval_is_true(i1 %C, i1 %X) { define <2 x i1> @trueval_is_true_vec(<2 x i1> %C, <2 x i1> %X) { ; CHECK-LABEL: @trueval_is_true_vec( -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i1> , <2 x i1> [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[X:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %R = select <2 x i1> %C, <2 x i1> , <2 x i1> %X @@ -89,7 +89,7 @@ define i1 @test9(i1 %C, i1 %X) { define <2 x i1> @test9vec(<2 x i1> %C, <2 x i1> %X) { ; CHECK-LABEL: @test9vec( -; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], +; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[NOT_C]], <2 x i1> [[X:%.*]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -119,8 +119,8 @@ define i1 @test10(i1 %C, i1 %X) { define <2 x i1> @test10vec(<2 x i1> %C, <2 x i1> %X) { ; CHECK-LABEL: @test10vec( -; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[NOT_C]], <2 x i1> , <2 x i1> [[X:%.*]] +; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], splat (i1 true) +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[NOT_C]], <2 x i1> splat (i1 true), <2 x i1> [[X:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %R = select <2 x i1> %C, <2 x i1> %X, <2 x i1> @@ -156,7 +156,7 @@ define i1 @test24(i1 %a, i1 %b) { define <2 x i1> @test24vec(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @test24vec( -; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> , <2 x i1> [[B:%.*]] +; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> splat (i1 true), <2 x i1> [[B:%.*]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; %c = select <2 x i1> %a, <2 x i1> %a, <2 x i1> %b @@ -176,7 +176,7 @@ define i1 @test62(i1 %A, i1 %B) { define <2 x i1> @test62vec(<2 x i1> %A, <2 x i1> %B) { ; CHECK-LABEL: @test62vec( -; CHECK-NEXT: [[NOT_A:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT_A:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[NOT_A]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -198,8 +198,8 @@ define i1 @test63(i1 %A, i1 %B) { define <2 x i1> @test63vec(<2 x i1> %A, <2 x i1> %B) { ; CHECK-LABEL: @test63vec( -; CHECK-NEXT: [[NOT_A:%.*]] = xor <2 x i1> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[NOT_A]], <2 x i1> , <2 x i1> [[B:%.*]] +; CHECK-NEXT: [[NOT_A:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) +; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[NOT_A]], <2 x i1> splat (i1 true), <2 x i1> [[B:%.*]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; %not = xor <2 x i1> %A, @@ -276,7 +276,7 @@ define i32 @test12b(i1 %cond, i32 %a) { define <2 x i32> @test12bvec(<2 x i1> %cond, <2 x i32> %a) { ; CHECK-LABEL: @test12bvec( -; CHECK-NEXT: [[NOT_COND:%.*]] = xor <2 x i1> [[COND:%.*]], +; CHECK-NEXT: [[NOT_COND:%.*]] = xor <2 x i1> [[COND:%.*]], splat (i1 true) ; CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[NOT_COND]] to <2 x i32> ; CHECK-NEXT: [[D:%.*]] = ashr <2 x i32> [[A:%.*]], [[B]] ; CHECK-NEXT: ret <2 x i32> [[D]] @@ -1447,7 +1447,7 @@ define i32 @select_icmp_slt0_xor(i32 %x) { define <2 x i32> @select_icmp_slt0_xor_vec(<2 x i32> %x) { ; CHECK-LABEL: @select_icmp_slt0_xor_vec( -; CHECK-NEXT: [[X_XOR:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_XOR:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i32> [[X_XOR]] ; %cmp = icmp slt <2 x i32> %x, zeroinitializer @@ -1531,7 +1531,7 @@ define <4 x float> @PR33721(<4 x float> %w) { ; CHECK-LABEL: @PR33721( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <4 x float> [[W:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x float> , <4 x float> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x float> splat (float 0xFFFFFFFFE0000000), <4 x float> zeroinitializer ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; entry: @@ -1603,7 +1603,7 @@ define i32 @test_shl_zext_bool(i1 %t) { define <2 x i32> @test_shl_zext_bool_splat(<2 x i1> %t) { ; CHECK-LABEL: @test_shl_zext_bool_splat( -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T:%.*]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T:%.*]], <2 x i32> splat (i32 8), <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[R]] ; %r = select <2 x i1> %t, <2 x i32> , <2 x i32> zeroinitializer @@ -1644,7 +1644,7 @@ define float @copysign1_fmf(float %x) { define <2 x float> @copysign2(<2 x float> %x) { ; CHECK-LABEL: @copysign2( ; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[TMP1]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 4.200000e+01), <2 x float> [[TMP1]]) ; CHECK-NEXT: ret <2 x float> [[R]] ; %i = bitcast <2 x float> %x to <2 x i32> @@ -1668,7 +1668,7 @@ define float @copysign3(float %x) { define <2 x float> @copysign_vec_poison(<2 x float> %x) { ; CHECK-LABEL: @copysign_vec_poison( ; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[TMP1]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 4.200000e+01), <2 x float> [[TMP1]]) ; CHECK-NEXT: ret <2 x float> [[R]] ; %i = bitcast <2 x float> %x to <2 x i32> @@ -1679,7 +1679,7 @@ define <2 x float> @copysign_vec_poison(<2 x float> %x) { define <2 x float> @copysign_vec_poison1(<2 x float> %x) { ; CHECK-LABEL: @copysign_vec_poison1( -; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 4.200000e+01), <2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[R]] ; %i = bitcast <2 x float> %x to <2 x i32> @@ -1690,7 +1690,7 @@ define <2 x float> @copysign_vec_poison1(<2 x float> %x) { define <2 x float> @copysign_vec_poison3(<2 x float> %x) { ; CHECK-LABEL: @copysign_vec_poison3( -; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 4.200000e+01), <2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[R]] ; %i = bitcast <2 x float> %x to <2 x i32> @@ -1739,7 +1739,7 @@ define <2 x float> @copysign_type_mismatch2(<2 x float> %x) { ; CHECK-LABEL: @copysign_type_mismatch2( ; CHECK-NEXT: [[I:%.*]] = bitcast <2 x float> [[X:%.*]] to i64 ; CHECK-NEXT: [[ISPOS:%.*]] = icmp sgt i64 [[I]], -1 -; CHECK-NEXT: [[R:%.*]] = select i1 [[ISPOS]], <2 x float> , <2 x float> +; CHECK-NEXT: [[R:%.*]] = select i1 [[ISPOS]], <2 x float> splat (float 1.000000e+00), <2 x float> splat (float -1.000000e+00) ; CHECK-NEXT: ret <2 x float> [[R]] ; %i = bitcast <2 x float> %x to i64 @@ -2762,8 +2762,8 @@ define i8 @select_replacement_add_eq(i8 %x, i8 %y) { define <2 x i8> @select_replacement_add_eq_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @select_replacement_add_eq_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> , <2 x i8> [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 1) +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 2), <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[SEL]] ; %cmp = icmp eq <2 x i8> %x, @@ -2799,7 +2799,7 @@ define <2 x i8> @select_replacement_add_eq_vec_poison(<2 x i8> %x, <2 x i8> %y) define <2 x i8> @select_replacement_add_eq_vec_undef(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @select_replacement_add_eq_vec_undef( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], splat (i8 1) ; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[ADD]], <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[SEL]] ; @@ -2811,7 +2811,7 @@ define <2 x i8> @select_replacement_add_eq_vec_undef(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @select_replacement_add_eq_vec_undef_okay(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @select_replacement_add_eq_vec_undef_okay( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 1) ; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> , <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[SEL]] ; @@ -3370,7 +3370,7 @@ define i32 @select_cond_zext_cond(i1 %cond, i32 %b) { define <2 x i32> @select_cond_zext_cond_vec(<2 x i1> %cond, <2 x i32> %b) { ; CHECK-LABEL: @select_cond_zext_cond_vec( -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> , <2 x i32> [[B:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> splat (i32 1), <2 x i32> [[B:%.*]] ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %zext = zext <2 x i1> %cond to <2 x i32> @@ -3390,7 +3390,7 @@ define i32 @select_cond_sext_cond(i1 %cond, i32 %b) { define <2 x i32> @select_cond_sext_cond_vec(<2 x i1> %cond, <2 x i32> %b) { ; CHECK-LABEL: @select_cond_sext_cond_vec( -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> , <2 x i32> [[B:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> splat (i32 -1), <2 x i32> [[B:%.*]] ; CHECK-NEXT: ret <2 x i32> [[SEL]] ; %sext = sext <2 x i1> %cond to <2 x i32> @@ -3582,7 +3582,7 @@ define i32 @not_clamp_umin2(i32 %x) { define <2 x i8> @clamp_umaxval(<2 x i8> %x) { ; CHECK-LABEL: @clamp_umaxval( -; CHECK-NEXT: [[SEL:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[SEL:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 -2)) ; CHECK-NEXT: ret <2 x i8> [[SEL]] ; %cmp = icmp eq <2 x i8> %x, @@ -3667,7 +3667,7 @@ define i8 @not_clamp_smin2(i8 %x) { define <2 x i8> @clamp_smaxval(<2 x i8> %x) { ; CHECK-LABEL: @clamp_smaxval( -; CHECK-NEXT: [[SEL:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[SEL:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 126)) ; CHECK-NEXT: ret <2 x i8> [[SEL]] ; %cmp = icmp eq <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/select_meta.ll b/llvm/test/Transforms/InstCombine/select_meta.ll index d8f945b8d1b32f..f22b5a4fd3c5f2 100644 --- a/llvm/test/Transforms/InstCombine/select_meta.ll +++ b/llvm/test/Transforms/InstCombine/select_meta.ll @@ -371,7 +371,7 @@ define double @select_fmul(i1 %cond, double %x, double %y) { define <2 x float> @select_fdiv(i1 %cond, <2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @select_fdiv( -; CHECK-NEXT: [[OP:%.*]] = select nnan i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> , !prof [[PROF0]], !unpredictable [[META2]] +; CHECK-NEXT: [[OP:%.*]] = select nnan i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> splat (float 1.000000e+00), !prof [[PROF0]], !unpredictable [[META2]] ; CHECK-NEXT: [[RET:%.*]] = fdiv <2 x float> [[X:%.*]], [[OP]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll b/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll index a3c8d3393d04fa..e6e1ea689521d8 100644 --- a/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll +++ b/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll @@ -187,8 +187,8 @@ define i32 @shl_nsw_nuw_add_nsw_nuw(i32 %NBits) { define <2 x i32> @shl_add_vec(<2 x i32> %NBits) { ; CHECK-LABEL: @shl_add_vec( -; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <2 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[RET:%.*]] = xor <2 x i32> [[NOTMASK]], +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <2 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[RET:%.*]] = xor <2 x i32> [[NOTMASK]], splat (i32 -1) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %setbit = shl <2 x i32> , %NBits @@ -198,8 +198,8 @@ define <2 x i32> @shl_add_vec(<2 x i32> %NBits) { define <3 x i32> @shl_add_vec_poison0(<3 x i32> %NBits) { ; CHECK-LABEL: @shl_add_vec_poison0( -; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], splat (i32 -1) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %setbit = shl <3 x i32> , %NBits @@ -209,8 +209,8 @@ define <3 x i32> @shl_add_vec_poison0(<3 x i32> %NBits) { define <3 x i32> @shl_add_vec_poison1(<3 x i32> %NBits) { ; CHECK-LABEL: @shl_add_vec_poison1( -; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], splat (i32 -1) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %setbit = shl <3 x i32> , %NBits @@ -220,8 +220,8 @@ define <3 x i32> @shl_add_vec_poison1(<3 x i32> %NBits) { define <3 x i32> @shl_add_vec_poison2(<3 x i32> %NBits) { ; CHECK-LABEL: @shl_add_vec_poison2( -; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> , [[NBITS:%.*]] -; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], +; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]] +; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], splat (i32 -1) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %setbit = shl <3 x i32> , %NBits diff --git a/llvm/test/Transforms/InstCombine/set.ll b/llvm/test/Transforms/InstCombine/set.ll index f44ac83f7f5916..214e9adb9bd545 100644 --- a/llvm/test/Transforms/InstCombine/set.ll +++ b/llvm/test/Transforms/InstCombine/set.ll @@ -144,7 +144,7 @@ define i1 @test13(i1 %A, i1 %B) { define <2 x i1> @test13vec(<2 x i1> %A, <2 x i1> %B) { ; CHECK-LABEL: @test13vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[B:%.*]], splat (i1 true) ; CHECK-NEXT: [[C:%.*]] = or <2 x i1> [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -165,7 +165,7 @@ define i1 @test14(i1 %A, i1 %B) { define <3 x i1> @test14vec(<3 x i1> %A, <3 x i1> %B) { ; CHECK-LABEL: @test14vec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i1> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[C:%.*]] = xor <3 x i1> [[TMP1]], +; CHECK-NEXT: [[C:%.*]] = xor <3 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: ret <3 x i1> [[C]] ; %C = icmp eq <3 x i1> %A, %B @@ -257,7 +257,7 @@ define i1 @xor_of_icmps_neg_to_ne(i64 %a) { define <2 x i1> @xor_of_icmps_to_ne_vector(<2 x i64> %a) { ; CHECK-LABEL: @xor_of_icmps_to_ne_vector( -; CHECK-NEXT: [[XOR:%.*]] = icmp ne <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[XOR:%.*]] = icmp ne <2 x i64> [[A:%.*]], splat (i64 5) ; CHECK-NEXT: ret <2 x i1> [[XOR]] ; %b = icmp sgt <2 x i64> %a, @@ -419,7 +419,7 @@ define i32 @test20(i32 %A) { define <2 x i32> @test20vec(<2 x i32> %A) { ; CHECK-LABEL: @test20vec( -; CHECK-NEXT: [[D:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[D:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[D]] ; %B = and <2 x i32> %A, @@ -442,8 +442,8 @@ define i32 @test21(i32 %a) { define <2 x i32> @test21vec(<2 x i32> %a) { ; CHECK-LABEL: @test21vec( -; CHECK-NEXT: [[TMP_6:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[TMP_6_LOBIT:%.*]] = and <2 x i32> [[TMP_6]], +; CHECK-NEXT: [[TMP_6:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 2) +; CHECK-NEXT: [[TMP_6_LOBIT:%.*]] = and <2 x i32> [[TMP_6]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[TMP_6_LOBIT]] ; %tmp.6 = and <2 x i32> %a, @@ -490,8 +490,8 @@ define i32 @test23(i32 %a) { define <2 x i32> @test23vec(<2 x i32> %a) { ; CHECK-LABEL: @test23vec( -; CHECK-NEXT: [[TMP_1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[TMP_3:%.*]] = xor <2 x i32> [[TMP_1]], +; CHECK-NEXT: [[TMP_1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 1) +; CHECK-NEXT: [[TMP_3:%.*]] = xor <2 x i32> [[TMP_1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[TMP_3]] ; %tmp.1 = and <2 x i32> %a, @@ -516,9 +516,9 @@ define i32 @test24(i32 %a) { define <2 x i32> @test24vec(<2 x i32> %a) { ; CHECK-LABEL: @test24vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[DOTLOBIT:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP_3:%.*]] = xor <2 x i32> [[DOTLOBIT]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 2) +; CHECK-NEXT: [[DOTLOBIT:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1) +; CHECK-NEXT: [[TMP_3:%.*]] = xor <2 x i32> [[DOTLOBIT]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[TMP_3]] ; %tmp1 = and <2 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll index b992460d0be698..516f1a2bfd5c53 100644 --- a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll +++ b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll @@ -55,7 +55,7 @@ define i16 @n2(i8 %x) { define <2 x i16> @t3_vec(<2 x i8> %x) { ; CHECK-LABEL: @t3_vec( -; CHECK-NEXT: [[A:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 4) ; CHECK-NEXT: call void @usevec(<2 x i8> [[A]]) ; CHECK-NEXT: [[C:%.*]] = sext <2 x i8> [[A]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[C]] diff --git a/llvm/test/Transforms/InstCombine/sext.ll b/llvm/test/Transforms/InstCombine/sext.ll index a554f2b28d6f29..ee3c52259f930a 100644 --- a/llvm/test/Transforms/InstCombine/sext.ll +++ b/llvm/test/Transforms/InstCombine/sext.ll @@ -143,8 +143,8 @@ define i32 @test10(i32 %i) { define <2 x i32> @test10_vec(<2 x i32> %i) { ; CHECK-LABEL: @test10_vec( -; CHECK-NEXT: [[D1:%.*]] = shl <2 x i32> [[I:%.*]], -; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i32> [[D1]], +; CHECK-NEXT: [[D1:%.*]] = shl <2 x i32> [[I:%.*]], splat (i32 30) +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i32> [[D1]], splat (i32 30) ; CHECK-NEXT: ret <2 x i32> [[D]] ; %A = trunc <2 x i32> %i to <2 x i8> @@ -338,8 +338,8 @@ define <2 x i32> @smear_set_bit_vec_use1(<2 x i32> %x) { ; CHECK-LABEL: @smear_set_bit_vec_use1( ; CHECK-NEXT: [[T:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i5> ; CHECK-NEXT: call void @use_vec(<2 x i5> [[T]]) -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X]], -; CHECK-NEXT: [[S:%.*]] = ashr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X]], splat (i32 27) +; CHECK-NEXT: [[S:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[S]] ; %t = trunc <2 x i32> %x to <2 x i5> diff --git a/llvm/test/Transforms/InstCombine/shift-add.ll b/llvm/test/Transforms/InstCombine/shift-add.ll index 016f877a9efb51..81cbc2ac23b5f2 100644 --- a/llvm/test/Transforms/InstCombine/shift-add.ll +++ b/llvm/test/Transforms/InstCombine/shift-add.ll @@ -289,7 +289,7 @@ define i8 @lshr_exact_add_negative_shift_positive_extra_use(i8 %x) { define <2 x i9> @lshr_exact_add_negative_shift_positive_vec(<2 x i9> %x) { ; CHECK-LABEL: @lshr_exact_add_negative_shift_positive_vec( -; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i9> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i9> splat (i9 -256), [[X:%.*]] ; CHECK-NEXT: ret <2 x i9> [[R]] ; %a = add <2 x i9> %x, @@ -301,8 +301,8 @@ define <2 x i9> @lshr_exact_add_negative_shift_positive_vec(<2 x i9> %x) { define <2 x i9> @lshr_exact_add_negative_shift_lzcnt(<2 x i9> %x) { ; CHECK-LABEL: @lshr_exact_add_negative_shift_lzcnt( -; CHECK-NEXT: [[A:%.*]] = add <2 x i9> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i9> , [[A]] +; CHECK-NEXT: [[A:%.*]] = add <2 x i9> [[X:%.*]], splat (i9 -7) +; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i9> splat (i9 4), [[A]] ; CHECK-NEXT: ret <2 x i9> [[R]] ; %a = add <2 x i9> %x, @@ -372,7 +372,7 @@ define i8 @ashr_exact_add_negative_shift_negative_extra_use(i8 %x) { define <2 x i7> @ashr_exact_add_negative_shift_negative_vec(<2 x i7> %x) { ; CHECK-LABEL: @ashr_exact_add_negative_shift_negative_vec( -; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i7> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i7> splat (i7 -64), [[X:%.*]] ; CHECK-NEXT: ret <2 x i7> [[R]] ; %a = add <2 x i7> %x, @@ -384,8 +384,8 @@ define <2 x i7> @ashr_exact_add_negative_shift_negative_vec(<2 x i7> %x) { define <2 x i7> @ashr_exact_add_negative_leading_ones_vec(<2 x i7> %x) { ; CHECK-LABEL: @ashr_exact_add_negative_leading_ones_vec( -; CHECK-NEXT: [[A:%.*]] = add <2 x i7> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i7> , [[A]] +; CHECK-NEXT: [[A:%.*]] = add <2 x i7> [[X:%.*]], splat (i7 -5) +; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i7> splat (i7 -4), [[A]] ; CHECK-NEXT: ret <2 x i7> [[R]] ; %a = add <2 x i7> %x, @@ -410,9 +410,9 @@ define i32 @shl_nsw_add_negative(i32 %x) { define <2 x i8> @shl_nuw_add_negative_splat_uses(<2 x i8> %x, ptr %p) { ; CHECK-LABEL: @shl_nuw_add_negative_splat_uses( -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -2) ; CHECK-NEXT: store <2 x i8> [[A]], ptr [[P:%.*]], align 2 -; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i8> , [[X]] +; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i8> splat (i8 3), [[X]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = add <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll index c4260f4cb2bf88..c134833630338c 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll @@ -165,7 +165,7 @@ define i1 @n4(i32 %x, i32 %len) { ; New shift amount would be 16, minimal count of leading zeros in %x is 16. Ok. define <2 x i1> @t5_vec(<2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @t5_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], splat (i64 16) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], ; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T5]] @@ -183,9 +183,9 @@ define <2 x i1> @t5_vec(<2 x i64> %y, <2 x i32> %len) { ; New shift amount would be 16, minimal count of leading zeros in %x is 15, not ok to fold. define <2 x i1> @n6_vec(<2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @n6_vec( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> splat (i32 32), [[LEN:%.*]] ; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> , [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], splat (i32 -16) ; CHECK-NEXT: [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64> ; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> [[Y:%.*]], [[T2_WIDE]] ; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32> @@ -224,9 +224,9 @@ define <2 x i1> @t7_vec(<2 x i32> %x, <2 x i32> %len) { ; New shift amount would be 16, minimal count of leading zeros in %x is 48, not ok to fold. define <2 x i1> @n8_vec(<2 x i32> %x, <2 x i32> %len) { ; CHECK-LABEL: @n8_vec( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> splat (i32 32), [[LEN:%.*]] ; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], splat (i32 -16) ; CHECK-NEXT: [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64> ; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i64> , [[T2_WIDE]] ; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc nuw nsw <2 x i64> [[T3]] to <2 x i32> @@ -335,7 +335,7 @@ define i1 @t10_shift_by_one(i32 %x, i64 %y, i32 %len) { ; A mix of those conditions is ok. define <2 x i1> @t11_zero_and_almost_bitwidth(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @t11_zero_and_almost_bitwidth( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> splat (i32 64), [[LEN:%.*]] ; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] ; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], ; CHECK-NEXT: [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64> @@ -357,7 +357,7 @@ define <2 x i1> @t11_zero_and_almost_bitwidth(<2 x i32> %x, <2 x i64> %y, <2 x i } define <2 x i1> @n12_bad(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @n12_bad( -; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> , [[LEN:%.*]] +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> splat (i32 64), [[LEN:%.*]] ; CHECK-NEXT: [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]] ; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], ; CHECK-NEXT: [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64> @@ -414,7 +414,7 @@ define i1 @t14_x_is_one(i32 %x, i32 %len) { define <2 x i1> @t15_vec_x_is_one_or_zero(<2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @t15_vec_x_is_one_or_zero( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], splat (i64 48) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], ; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T5]] diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll index 6e9552e2af4cce..6f4e78e9f91a0f 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll @@ -38,7 +38,7 @@ define i1 @t0_const_after_fold_lshr_shl_ne(i32 %x, i64 %y, i32 %len) { define <2 x i1> @t1_vec_splat(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[Y:%.*]], [[TMP2]] ; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer @@ -363,7 +363,7 @@ define i1 @t10_constants(i32 %x, i64 %y) { define <2 x i1> @t11_constants_vec_splat(<2 x i32> %x, <2 x i64> %y) { ; CHECK-LABEL: @t11_constants_vec_splat( ; CHECK-NEXT: [[Y_TR:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 26) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y_TR]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T3]] @@ -378,7 +378,7 @@ define <2 x i1> @t11_constants_vec_splat(<2 x i32> %x, <2 x i64> %y) { define <2 x i1> @t12_constants_vec_nonsplat(<2 x i32> %x, <2 x i64> %y) { ; CHECK-LABEL: @t12_constants_vec_nonsplat( ; CHECK-NEXT: [[Y_TR:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 28) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y_TR]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T3]] diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll index 00a19e4962e6ca..070a3b03302124 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll @@ -119,7 +119,7 @@ define i1 @t6_const_shl_lshr_ne(i32 %x, i32 %y, i32 %shamt0, i32 %shamt1) { define <2 x i1> @t7_const_lshr_shl_ne_vec_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @t7_const_lshr_shl_ne_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[T3]] @@ -171,7 +171,7 @@ define <3 x i1> @t10_const_lshr_shl_ne_vec_poison1(<3 x i32> %x, <3 x i32> %y) { } define <3 x i1> @t11_const_lshr_shl_ne_vec_poison2(<3 x i32> %x, <3 x i32> %y) { ; CHECK-LABEL: @t11_const_lshr_shl_ne_vec_poison2( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[T3]] diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll index 9efc30cc9d916e..78ac4cbabde7d9 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll @@ -29,7 +29,7 @@ define i16 @t0(i32 %x, i16 %y) { define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: [[T5:%.*]] = trunc nsw <2 x i32> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[T5]] ; diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll index c31b6ed3ea2ba9..937345b00c0339 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll @@ -29,7 +29,7 @@ define i16 @t0(i32 %x, i16 %y) { define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: [[T5:%.*]] = trunc nuw nsw <2 x i32> [[TMP1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[T5]] ; diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll index 073013b34a3baa..0491ce1def8488 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll @@ -30,7 +30,7 @@ define i16 @t0(i32 %x, i16 %y) { define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ; CHECK-LABEL: @t1_vec_splat( ; CHECK-NEXT: [[X_TR:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i16> -; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[X_TR]], +; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[X_TR]], splat (i16 8) ; CHECK-NEXT: ret <2 x i16> [[T5]] ; %t0 = sub <2 x i16> , %y diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll index 6bbe4c5151e458..b4c606f037d561 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll @@ -24,7 +24,7 @@ define i32 @t0(i32 %x, i32 %y) { define <2 x i32> @t1_vec_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t0 = sub <2 x i32> , %y @@ -36,7 +36,7 @@ define <2 x i32> @t1_vec_splat(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @t2_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @t2_vec_nonsplat( -; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 30) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t0 = sub <2 x i32> , %y diff --git a/llvm/test/Transforms/InstCombine/shift-logic.ll b/llvm/test/Transforms/InstCombine/shift-logic.ll index 593a22bec6490b..ab8d98a9523ba8 100644 --- a/llvm/test/Transforms/InstCombine/shift-logic.ll +++ b/llvm/test/Transforms/InstCombine/shift-logic.ll @@ -46,7 +46,7 @@ define i16 @shl_or(i16 %x, i16 %py) { define <2 x i16> @shl_or_poison(<2 x i16> %x, <2 x i16> %py) { ; CHECK-LABEL: @shl_or_poison( -; CHECK-NEXT: [[Y:%.*]] = srem <2 x i16> [[PY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = srem <2 x i16> [[PY:%.*]], splat (i16 42) ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <2 x i16> [[Y]], ; CHECK-NEXT: [[SH1:%.*]] = or <2 x i16> [[TMP1]], [[TMP2]] @@ -102,7 +102,7 @@ define i64 @lshr_and(i64 %x, i64 %py) { define <2 x i64> @lshr_and_poison(<2 x i64> %x, <2 x i64> %py) { ; CHECK-LABEL: @lshr_and_poison( -; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42) ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[Y]], ; CHECK-NEXT: [[SH1:%.*]] = and <2 x i64> [[TMP1]], [[TMP2]] @@ -117,8 +117,8 @@ define <2 x i64> @lshr_and_poison(<2 x i64> %x, <2 x i64> %py) { define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @lshr_or( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[X:%.*]], splat (i32 12) +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[Y:%.*]], splat (i32 7) ; CHECK-NEXT: [[SH1:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[SH1]] ; @@ -130,9 +130,9 @@ define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) { define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %py) { ; CHECK-LABEL: @lshr_xor( -; CHECK-NEXT: [[Y:%.*]] = srem <8 x i16> [[PY:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[Y]], +; CHECK-NEXT: [[Y:%.*]] = srem <8 x i16> [[PY:%.*]], splat (i16 42) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[X:%.*]], splat (i16 12) +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[Y]], splat (i16 7) ; CHECK-NEXT: [[SH1:%.*]] = xor <8 x i16> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <8 x i16> [[SH1]] ; @@ -146,8 +146,8 @@ define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %py) { define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %py, <16 x i8> %pz) { ; CHECK-LABEL: @ashr_and( ; CHECK-NEXT: [[Y:%.*]] = srem <16 x i8> [[PY:%.*]], [[PZ:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i8> [[Y]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i8> [[X:%.*]], splat (i8 5) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i8> [[Y]], splat (i8 2) ; CHECK-NEXT: [[SH1:%.*]] = and <16 x i8> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <16 x i8> [[SH1]] ; @@ -160,8 +160,8 @@ define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %py, <16 x i8> %pz) { define <2 x i64> @ashr_or(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @ashr_or( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <2 x i64> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[X:%.*]], splat (i64 12) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <2 x i64> [[Y:%.*]], splat (i64 7) ; CHECK-NEXT: [[SH1:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i64> [[SH1]] ; @@ -281,7 +281,7 @@ define i64 @lshr_mul_nuw_nsw(i64 %0) { define <4 x i32> @lshr_mul_vector(<4 x i32> %0) { ; CHECK-LABEL: @lshr_mul_vector( -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i32> [[TMP0:%.*]], splat (i32 13) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %2 = mul nuw <4 x i32> %0, @@ -394,7 +394,7 @@ define <2 x i8> @shl_add_nonuniform(<2 x i8> %x, <2 x i8> %y) { define <2 x i64> @shl_add_poison(<2 x i64> %x, <2 x i64> %py) { ; CHECK-LABEL: @shl_add_poison( -; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42) ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <2 x i64> [[Y]], ; CHECK-NEXT: [[SH1:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] @@ -436,7 +436,7 @@ define <2 x i8> @lshr_add_nonuniform(<2 x i8> %x, <2 x i8> %y) { define <2 x i64> @lshr_add_poison(<2 x i64> %x, <2 x i64> %py) { ; CHECK-LABEL: @lshr_add_poison( -; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42) ; CHECK-NEXT: [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i64> [[Y]], [[SH0]] ; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i64> [[R]], @@ -492,7 +492,7 @@ define <2 x i8> @shl_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) { define <2 x i64> @shl_sub_poison(<2 x i64> %x, <2 x i64> %py) { ; CHECK-LABEL: @shl_sub_poison( -; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42) ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <2 x i64> [[Y]], ; CHECK-NEXT: [[SH1:%.*]] = sub <2 x i64> [[TMP2]], [[TMP1]] @@ -534,7 +534,7 @@ define <2 x i8> @lshr_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) { define <2 x i64> @lshr_sub_poison(<2 x i64> %x, <2 x i64> %py) { ; CHECK-LABEL: @lshr_sub_poison( -; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], +; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42) ; CHECK-NEXT: [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], ; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i64> [[Y]], [[SH0]] ; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i64> [[R]], diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll index 7c35718601ba7f..fa58de3528d37b 100644 --- a/llvm/test/Transforms/InstCombine/shift-shift.ll +++ b/llvm/test/Transforms/InstCombine/shift-shift.ll @@ -486,7 +486,7 @@ define i8 @shl_lshr_demand4(i8 %x) { define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> splat (i8 37), [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] ; @@ -501,7 +501,7 @@ define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) { define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_undef_left( ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], splat (i8 2) ; CHECK-NEXT: [[R:%.*]] = trunc nuw <2 x i8> [[LSHR]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] ; @@ -515,7 +515,7 @@ define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) { define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_undef_right( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> splat (i8 -108), [[X:%.*]] ; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] @@ -530,7 +530,7 @@ define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) { define <2 x i6> @shl_lshr_demand5_nonuniform_vec_left(<2 x i8> %x) { ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_left( -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> splat (i8 -108), [[X:%.*]] ; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6> ; CHECK-NEXT: ret <2 x i6> [[R]] @@ -649,8 +649,8 @@ define i8 @lshr_shl_demand4(i8 %x) { define <2 x i8> @lshr_shl_demand5(<2 x i8> %x) { ; CHECK-LABEL: @lshr_shl_demand5( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> splat (i8 -76), [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], splat (i8 108) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %shr = lshr <2 x i8> , %x ; 0b0010_1101 @@ -663,9 +663,9 @@ define <2 x i8> @lshr_shl_demand5(<2 x i8> %x) { define <2 x i8> @lshr_shl_demand5_undef_left(<2 x i8> %x) { ; CHECK-LABEL: @lshr_shl_demand5_undef_left( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> splat (i8 45), [[X:%.*]] ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[SHR]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], splat (i8 108) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %shr = lshr <2 x i8> , %x ; 0b0010_1101 @@ -679,8 +679,8 @@ define <2 x i8> @lshr_shl_demand5_undef_left(<2 x i8> %x) { define <2 x i8> @lshr_shl_demand5_undef_right(<2 x i8> %x) { ; CHECK-LABEL: @lshr_shl_demand5_undef_right( ; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[SHR]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[SHR]], splat (i8 2) +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], splat (i8 108) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %shr = lshr <2 x i8> , %x ; 0b0010_1101 @@ -693,9 +693,9 @@ define <2 x i8> @lshr_shl_demand5_undef_right(<2 x i8> %x) { define <2 x i8> @lshr_shl_demand5_nonuniform_vec_left(<2 x i8> %x) { ; CHECK-LABEL: @lshr_shl_demand5_nonuniform_vec_left( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> splat (i8 45), [[X:%.*]] ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[SHR]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], splat (i8 108) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %shr = lshr <2 x i8> , %x ; 0b0010_1101 @@ -709,7 +709,7 @@ define <2 x i8> @lshr_shl_demand5_nonuniform_vec_left(<2 x i8> %x) { define <2 x i8> @lshr_shl_demand5_nonuniform_vec_right(<2 x i8> %x) { ; CHECK-LABEL: @lshr_shl_demand5_nonuniform_vec_right( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], splat (i8 108) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %shr = lshr <2 x i8> , %x ; 0b0010_1101. 0b0000_1101 diff --git a/llvm/test/Transforms/InstCombine/shift-sra.ll b/llvm/test/Transforms/InstCombine/shift-sra.ll index 82bce5f02554ee..d631da0d444adb 100644 --- a/llvm/test/Transforms/InstCombine/shift-sra.ll +++ b/llvm/test/Transforms/InstCombine/shift-sra.ll @@ -143,7 +143,7 @@ define i32 @ashr_overshift(i32 %x) { define <2 x i32> @ashr_ashr_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @ashr_ashr_splat_vec( -; CHECK-NEXT: [[SH2:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SH2:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 12) ; CHECK-NEXT: ret <2 x i32> [[SH2]] ; %sh1 = ashr <2 x i32> %x, @@ -155,7 +155,7 @@ define <2 x i32> @ashr_ashr_splat_vec(<2 x i32> %x) { define <2 x i32> @ashr_overshift_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @ashr_overshift_splat_vec( -; CHECK-NEXT: [[SH2:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SH2:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[SH2]] ; %sh1 = ashr <2 x i32> %x, @@ -180,7 +180,7 @@ define i32 @hoist_ashr_ahead_of_sext_1(i8 %x) { define <2 x i32> @hoist_ashr_ahead_of_sext_1_splat(<2 x i8> %x) { ; CHECK-LABEL: @hoist_ashr_ahead_of_sext_1_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[R:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -206,7 +206,7 @@ define i32 @hoist_ashr_ahead_of_sext_2(i8 %x) { define <2 x i32> @hoist_ashr_ahead_of_sext_2_splat(<2 x i8> %x) { ; CHECK-LABEL: @hoist_ashr_ahead_of_sext_2_splat( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: [[R:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 986e1073c63891..d2ee97f39123b0 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -235,8 +235,8 @@ define i1 @test17(i32 %A) { define <2 x i1> @test17vec(<2 x i32> %A) { ; CHECK-LABEL: @test17vec( -; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], +; CHECK-NEXT: [[B_MASK:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -8) +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], splat (i32 9872) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = lshr <2 x i32> %A, @@ -267,7 +267,7 @@ define i1 @test19(i32 %A) { define <2 x i1> @test19vec(<2 x i32> %A) { ; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 4) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i32> %A, @@ -288,7 +288,7 @@ define i1 @test19a(i32 %A) { define <2 x i1> @test19a_vec(<2 x i32> %A) { ; CHECK-LABEL: @test19a_vec( -; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 -5) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = ashr <2 x i32> %A, @@ -433,7 +433,7 @@ entry: define <2 x i32> @test29_uniform(<2 x i64> %d18) { ; CHECK-LABEL: @test29_uniform( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <2 x i64> [[D18:%.*]], +; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <2 x i64> [[D18:%.*]], splat (i64 63) ; CHECK-NEXT: [[I101:%.*]] = trunc nuw nsw <2 x i64> [[SUM_SHIFT]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[I101]] ; @@ -523,7 +523,7 @@ define i1 @test33(i32 %X) { define <2 x i1> @test33vec(<2 x i32> %X) { ; CHECK-LABEL: @test33vec( -; CHECK-NEXT: [[I1_MASK:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[I1_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 16777216) ; CHECK-NEXT: [[I2:%.*]] = icmp ne <2 x i32> [[I1_MASK]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[I2]] ; @@ -594,7 +594,7 @@ define i64 @test37(i128 %A, i32 %B) { define <2 x i32> @shl_nuw_nsw_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_splat_vec( ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <2 x i32> [[T2]], +; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <2 x i32> [[T2]], splat (i32 17) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t2 = zext <2 x i8> %x to <2 x i32> @@ -615,8 +615,8 @@ define i32 @test38(i32 %x) { define <2 x i32> @test38_uniform(<2 x i32> %x) { ; CHECK-LABEL: @test38_uniform( -; CHECK-NEXT: [[REM1:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl nuw <2 x i32> , [[REM1]] +; CHECK-NEXT: [[REM1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 31) +; CHECK-NEXT: [[SHL:%.*]] = shl nuw <2 x i32> splat (i32 1), [[REM1]] ; CHECK-NEXT: ret <2 x i32> [[SHL]] ; %rem = srem <2 x i32> %x, @@ -627,7 +627,7 @@ define <2 x i32> @test38_uniform(<2 x i32> %x) { define <3 x i32> @test38_nonuniform(<3 x i32> %x) { ; CHECK-LABEL: @test38_nonuniform( ; CHECK-NEXT: [[REM1:%.*]] = and <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl nuw <3 x i32> , [[REM1]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw <3 x i32> splat (i32 1), [[REM1]] ; CHECK-NEXT: ret <3 x i32> [[SHL]] ; %rem = srem <3 x i32> %x, @@ -685,7 +685,7 @@ define i32 @test42(i32 %a, i32 %b) { define <2 x i32> @test42vec(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test42vec( -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> , [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> splat (i32 12), [[B:%.*]] ; CHECK-NEXT: [[DIV2:%.*]] = lshr <2 x i32> [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[DIV2]] ; @@ -767,7 +767,7 @@ define i32 @test46(i32 %a) { define <2 x i32> @test46_splat_vec(<2 x i32> %a) { ; CHECK-LABEL: @test46_splat_vec( -; CHECK-NEXT: [[Z:%.*]] = ashr exact <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[Z:%.*]] = ashr exact <2 x i32> [[A:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %y = ashr exact <2 x i32> %a, @@ -791,7 +791,7 @@ define i8 @test47(i8 %a) { define <2 x i8> @test47_splat_vec(<2 x i8> %a) { ; CHECK-LABEL: @test47_splat_vec( -; CHECK-NEXT: [[Z:%.*]] = lshr exact <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[Z:%.*]] = lshr exact <2 x i8> [[A:%.*]], splat (i8 2) ; CHECK-NEXT: ret <2 x i8> [[Z]] ; %y = lshr exact <2 x i8> %a, @@ -827,7 +827,7 @@ define i32 @test48_nuw_nsw(i32 %x) { define <2 x i32> @test48_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test48_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = lshr exact <2 x i32> %x, @@ -863,7 +863,7 @@ define i32 @test49_nuw_nsw(i32 %x) { define <2 x i32> @test49_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test49_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = ashr exact <2 x i32> %x, @@ -888,7 +888,7 @@ define i32 @test50(i32 %x) { define <2 x i32> @test50_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test50_splat_vec( -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl nsw <2 x i32> %x, @@ -913,7 +913,7 @@ define i32 @test51(i32 %x) { define <2 x i32> @test51_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test51_splat_vec( -; CHECK-NEXT: [[B:%.*]] = lshr exact <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = lshr exact <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl nuw <2 x i32> %x, @@ -939,8 +939,8 @@ define i32 @test51_no_nuw(i32 %x) { define <2 x i32> @test51_no_nuw_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test51_no_nuw_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 2) +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[TMP1]], splat (i32 536870911) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl <2 x i32> %x, @@ -964,7 +964,7 @@ define i32 @test52(i32 %x) { define <2 x i32> @test52_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test52_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = shl nsw <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl nsw <2 x i32> %x, @@ -988,7 +988,7 @@ define i32 @test53(i32 %x) { define <2 x i32> @test53_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test53_splat_vec( -; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], splat (i32 2) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %A = shl nuw <2 x i32> %x, @@ -1013,8 +1013,8 @@ define i8 @test53_no_nuw(i8 %x) { define <2 x i8> @test53_no_nuw_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @test53_no_nuw_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 2) +; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], splat (i8 124) ; CHECK-NEXT: ret <2 x i8> [[B]] ; %A = shl <2 x i8> %x, @@ -1036,8 +1036,8 @@ define i32 @test54(i32 %x) { define <2 x i32> @test54_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test54_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 3) +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[TMP1]], splat (i32 16) ; CHECK-NEXT: ret <2 x i32> [[AND]] ; %shr2 = lshr <2 x i32> %x, @@ -1098,8 +1098,8 @@ define i32 @test58(i32 %x) { define <2 x i32> @test58_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @test58_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 3) +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[OR]] ; %shr = ashr <2 x i32> %x, @@ -1213,8 +1213,8 @@ define <4 x i32> @test62_non_splat_vector(<4 x i32> %a) { define <2 x i65> @test_63(<2 x i64> %t) { ; CHECK-LABEL: @test_63( ; CHECK-NEXT: [[A:%.*]] = zext <2 x i64> [[T:%.*]] to <2 x i65> -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i65> [[A]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i65> [[SEXT]], +; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i65> [[A]], splat (i65 33) +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i65> [[SEXT]], splat (i65 33) ; CHECK-NEXT: ret <2 x i65> [[B]] ; %a = zext <2 x i64> %t to <2 x i65> @@ -1235,7 +1235,7 @@ define i32 @test_shl_zext_bool(i1 %t) { define <2 x i32> @test_shl_zext_bool_splat(<2 x i1> %t) { ; CHECK-LABEL: @test_shl_zext_bool_splat( -; CHECK-NEXT: [[SHL:%.*]] = select <2 x i1> [[T:%.*]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[SHL:%.*]] = select <2 x i1> [[T:%.*]], <2 x i32> splat (i32 8), <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x i32> [[SHL]] ; %ext = zext <2 x i1> %t to <2 x i32> @@ -1293,7 +1293,7 @@ define i64 @shl_zext_extra_use(i32 %t) { define <2 x i64> @shl_zext_splat_vec(<2 x i32> %t) { ; CHECK-LABEL: @shl_zext_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[T:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[T:%.*]], splat (i32 8) ; CHECK-NEXT: [[SHL:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[SHL]] ; @@ -1318,9 +1318,9 @@ define i64 @shl_zext_mul(i32 %t) { define <3 x i17> @shl_zext_mul_splat(<3 x i5> %t) { ; CHECK-LABEL: @shl_zext_mul_splat( -; CHECK-NEXT: [[MUL:%.*]] = mul <3 x i5> [[T:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <3 x i5> [[T:%.*]], splat (i5 13) ; CHECK-NEXT: [[EXT:%.*]] = zext <3 x i5> [[MUL]] to <3 x i17> -; CHECK-NEXT: [[SHL:%.*]] = shl nuw <3 x i17> [[EXT]], +; CHECK-NEXT: [[SHL:%.*]] = shl nuw <3 x i17> [[EXT]], splat (i17 12) ; CHECK-NEXT: ret <3 x i17> [[SHL]] ; %mul = mul <3 x i5> %t, @@ -1374,7 +1374,7 @@ define i64 @shl_zext_mul_extra_use2(i32 %t) { define <2 x i8> @ashr_demanded_bits_splat(<2 x i8> %x) { ; CHECK-LABEL: @ashr_demanded_bits_splat( -; CHECK-NEXT: [[SHR:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: ret <2 x i8> [[SHR]] ; %and = and <2 x i8> %x, @@ -1394,7 +1394,7 @@ define @ashr_demanded_bits_splat2( %x) { define <2 x i8> @lshr_demanded_bits_splat(<2 x i8> %x) { ; CHECK-LABEL: @lshr_demanded_bits_splat( -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: ret <2 x i8> [[SHR]] ; %and = and <2 x i8> %x, @@ -1913,7 +1913,7 @@ define i64 @lshr_mul_negpow2_2(i64 %x) { define <2 x i32> @lshr_mul_negpow2_3(<2 x i32> %x) { ; CHECK-LABEL: @lshr_mul_negpow2_3( ; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[TMP1]], splat (i32 255) ; CHECK-NEXT: ret <2 x i32> [[A]] ; %a = mul <2 x i32> %x, @@ -1937,8 +1937,8 @@ define i32 @lshr_mul_negpow2_4(i32 %x) { define <2 x i32> @lshr_mul_negpow2_5(<2 x i32> %x) { ; CHECK-LABEL: @lshr_mul_negpow2_5( ; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[B:%.*]] = or disjoint <2 x i32> [[A]], +; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[TMP1]], splat (i32 65527) +; CHECK-NEXT: [[B:%.*]] = or disjoint <2 x i32> [[A]], splat (i32 8) ; CHECK-NEXT: ret <2 x i32> [[B]] ; %a = mul <2 x i32> %x, @@ -1973,7 +1973,7 @@ define i8 @ashr_sdiv_pos(i8 %x) { define <2 x i8> @ashr_sdiv_neg_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @ashr_sdiv_neg_splat_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 41) ; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1984,7 +1984,7 @@ define <2 x i8> @ashr_sdiv_neg_splat_vec(<2 x i8> %x) { define <2 x i8> @ashr_sdiv_neg_splat_vec_poison(<2 x i8> %x) { ; CHECK-LABEL: @ashr_sdiv_neg_splat_vec_poison( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 127) ; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -2119,7 +2119,7 @@ define i6 @shl_or7_eq_shl7(i6 %x, i6 %c) { define <2 x i8> @lshr_vec_or7_eq_shl7(<2 x i8> %x, <2 x i8> %c) { ; CHECK-LABEL: @lshr_vec_or7_eq_shl7( -; CHECK-NEXT: [[Y:%.*]] = lshr exact <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = lshr exact <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: ret <2 x i8> [[Y]] ; %amt = or <2 x i8> %c, @@ -2130,7 +2130,7 @@ define <2 x i8> @lshr_vec_or7_eq_shl7(<2 x i8> %x, <2 x i8> %c) { define <2 x i8> @ashr_vec_or7_eq_ashr7(<2 x i8> %x, <2 x i8> %c) { ; CHECK-LABEL: @ashr_vec_or7_eq_ashr7( -; CHECK-NEXT: [[Y:%.*]] = ashr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7) ; CHECK-NEXT: ret <2 x i8> [[Y]] ; %amt = or <2 x i8> %c, @@ -2141,7 +2141,7 @@ define <2 x i8> @ashr_vec_or7_eq_ashr7(<2 x i8> %x, <2 x i8> %c) { ; Negative test not bitwidth - 1 define <2 x i8> @ashr_vec_or6_fail(<2 x i8> %x, <2 x i8> %c) { ; CHECK-LABEL: @ashr_vec_or6_fail( -; CHECK-NEXT: [[AMT:%.*]] = or <2 x i8> [[C:%.*]], +; CHECK-NEXT: [[AMT:%.*]] = or <2 x i8> [[C:%.*]], splat (i8 6) ; CHECK-NEXT: [[Y:%.*]] = ashr <2 x i8> [[X:%.*]], [[AMT]] ; CHECK-NEXT: ret <2 x i8> [[Y]] ; diff --git a/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll index daa49557965943..197b9c80658e60 100644 --- a/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll @@ -72,7 +72,7 @@ define i1 @scalar_i32_shl_and_negC_ne(i32 %x, i32 %y) { define <4 x i1> @vec_4xi32_shl_and_negC_eq(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_shl_and_negC_eq( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y @@ -84,7 +84,7 @@ define <4 x i1> @vec_4xi32_shl_and_negC_eq(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_shl_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_shl_and_negC_eq_poison1( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y @@ -96,7 +96,7 @@ define <4 x i1> @vec_shl_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_shl_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_shl_and_negC_eq_poison2( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y @@ -108,7 +108,7 @@ define <4 x i1> @vec_shl_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_shl_and_negC_eq_poison3(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_shl_and_negC_eq_poison3( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 8) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll index dcc181945357da..27542dad2fe37c 100644 --- a/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll @@ -72,7 +72,7 @@ define i1 @scalar_i32_shl_and_signbit_ne(i32 %x, i32 %y) { define <4 x i1> @vec_4xi32_shl_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y @@ -84,7 +84,7 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_poison1( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y @@ -96,7 +96,7 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> %y define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_poison2( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y @@ -108,7 +108,7 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> %y define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison3(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_poison3( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], splat (i32 -1) ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/shl-bo.ll b/llvm/test/Transforms/InstCombine/shl-bo.ll index 356c4a288f9e31..c32ac2eacb25a1 100644 --- a/llvm/test/Transforms/InstCombine/shl-bo.ll +++ b/llvm/test/Transforms/InstCombine/shl-bo.ll @@ -20,10 +20,10 @@ define i8 @lshr_add(i8 %a, i8 %y) { define <2 x i8> @lshr_add_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_add_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 5) ; CHECK-NEXT: [[R2:%.*]] = add <2 x i8> [[Y:%.*]], [[B1]] -; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[R2]], +; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[R2]], splat (i8 -32) ; CHECK-NEXT: ret <2 x i8> [[L]] ; %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization @@ -50,10 +50,10 @@ define i8 @lshr_sub(i8 %a, i8 %y) { define <2 x i8> @lshr_sub_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_sub_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 3) ; CHECK-NEXT: [[R2:%.*]] = sub <2 x i8> [[Y:%.*]], [[B1]] -; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[R2]], +; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[R2]], splat (i8 -8) ; CHECK-NEXT: ret <2 x i8> [[L]] ; %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization @@ -79,8 +79,8 @@ define i8 @lshr_and(i8 %a, i8 %y) { define <2 x i8> @lshr_and_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 6) ; CHECK-NEXT: [[R2:%.*]] = and <2 x i8> [[Y:%.*]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[R2]] ; @@ -108,9 +108,9 @@ define i8 @lshr_or(i8 %a, i8 %y) { define <2 x i8> @lshr_or_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_or_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 4) +; CHECK-NEXT: [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 -16) ; CHECK-NEXT: [[L:%.*]] = or <2 x i8> [[Y_MASKED]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -138,9 +138,9 @@ define i8 @lshr_xor(i8 %a, i8 %y) { define <2 x i8> @lshr_xor_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_xor_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 3) +; CHECK-NEXT: [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 -8) ; CHECK-NEXT: [[L:%.*]] = xor <2 x i8> [[Y_MASKED]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -199,9 +199,9 @@ define i8 @lshr_and_add(i8 %a, i8 %y) { define <2 x i8> @lshr_and_add_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_add_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 3) +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 96) ; CHECK-NEXT: [[L:%.*]] = add <2 x i8> [[Y_MASK]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -232,9 +232,9 @@ define i8 @lshr_and_sub(i8 %a, i8 %y) { define <2 x i8> @lshr_and_sub_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_sub_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 2) +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 52) ; CHECK-NEXT: [[L:%.*]] = sub <2 x i8> [[Y_MASK]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -264,9 +264,9 @@ define i8 @lshr_and_and(i8 %a, i8 %y) { define <2 x i8> @lshr_and_and_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_and_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 2) +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 52) ; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[Y_MASK]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -329,9 +329,9 @@ define i8 @ashr_and_or_disjoint(i8 %a, i8 %y) { define <2 x i8> @lshr_and_or_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_or_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 2) +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 52) ; CHECK-NEXT: [[L:%.*]] = or <2 x i8> [[Y_MASK]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -361,9 +361,9 @@ define i8 @lshr_and_xor(i8 %a, i8 %y) { define <2 x i8> @lshr_and_xor_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_xor_commute_splat( -; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], -; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42) +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 2) +; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 52) ; CHECK-NEXT: [[L:%.*]] = xor <2 x i8> [[Y_MASK]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -525,8 +525,8 @@ define i32 @lshr_add_and_shl(i32 %x, i32 %y) { define <2 x i32> @lshr_add_and_shl_v2i32(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @lshr_add_and_shl_v2i32( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], -; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], splat (i32 5) +; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 4064) ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[X_MASK]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; @@ -540,7 +540,7 @@ define <2 x i32> @lshr_add_and_shl_v2i32(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @lshr_add_and_shl_v2i32_undef(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @lshr_add_and_shl_v2i32_undef( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 127) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[Y:%.*]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], ; CHECK-NEXT: ret <2 x i32> [[TMP4]] @@ -583,8 +583,8 @@ define i32 @shl_add_and_lshr(i32 %x, i32 %y) { define <2 x i32> @shl_add_and_lshr_v2i32(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @shl_add_and_lshr_v2i32( -; CHECK-NEXT: [[C1:%.*]] = shl <2 x i32> [[Y:%.*]], -; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[C1:%.*]] = shl <2 x i32> [[Y:%.*]], splat (i32 4) +; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 128) ; CHECK-NEXT: [[D:%.*]] = add <2 x i32> [[X_MASK]], [[C1]] ; CHECK-NEXT: ret <2 x i32> [[D]] ; @@ -647,8 +647,8 @@ define <8 x i16> @test_FoldShiftByConstant_CreateSHL2(<8 x i16> %in) { define <16 x i8> @test_FoldShiftByConstant_CreateAnd(<16 x i8> %in0) { ; CHECK-LABEL: @test_FoldShiftByConstant_CreateAnd( -; CHECK-NEXT: [[VSRA_N2:%.*]] = mul <16 x i8> [[IN0:%.*]], -; CHECK-NEXT: [[VSHL_N:%.*]] = and <16 x i8> [[VSRA_N2]], +; CHECK-NEXT: [[VSRA_N2:%.*]] = mul <16 x i8> [[IN0:%.*]], splat (i8 33) +; CHECK-NEXT: [[VSHL_N:%.*]] = and <16 x i8> [[VSRA_N2]], splat (i8 -32) ; CHECK-NEXT: ret <16 x i8> [[VSHL_N]] ; %vsra_n = ashr <16 x i8> %in0, diff --git a/llvm/test/Transforms/InstCombine/shl-demand.ll b/llvm/test/Transforms/InstCombine/shl-demand.ll index 08e6e745818489..00b681a9b4254e 100644 --- a/llvm/test/Transforms/InstCombine/shl-demand.ll +++ b/llvm/test/Transforms/InstCombine/shl-demand.ll @@ -95,9 +95,9 @@ define i32 @src_srem_shl_demand_max_mask_hit_demand(i32 %a0) { define <2 x i32> @src_srem_shl_mask_vector(<2 x i32> %a0) { ; CHECK-LABEL: @src_srem_shl_mask_vector( -; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shl nsw <2 x i32> [[SREM]], -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHL]], +; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], splat (i32 4) +; CHECK-NEXT: [[SHL:%.*]] = shl nsw <2 x i32> [[SREM]], splat (i32 29) +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHL]], splat (i32 -1073741824) ; CHECK-NEXT: ret <2 x i32> [[MASK]] ; %srem = srem <2 x i32> %a0, @@ -108,9 +108,9 @@ define <2 x i32> @src_srem_shl_mask_vector(<2 x i32> %a0) { define <2 x i32> @src_srem_shl_mask_vector_nonconstant(<2 x i32> %a0, <2 x i32> %a1) { ; CHECK-LABEL: @src_srem_shl_mask_vector_nonconstant( -; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], +; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], splat (i32 4) ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[SREM]], [[A1:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHL]], +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHL]], splat (i32 -1073741824) ; CHECK-NEXT: ret <2 x i32> [[MASK]] ; %srem = srem <2 x i32> %a0, diff --git a/llvm/test/Transforms/InstCombine/shl-sub.ll b/llvm/test/Transforms/InstCombine/shl-sub.ll index 962cf0562ae4e4..6cf60a76da9fb1 100644 --- a/llvm/test/Transforms/InstCombine/shl-sub.ll +++ b/llvm/test/Transforms/InstCombine/shl-sub.ll @@ -48,7 +48,7 @@ define i64 @shl_sub_i64(i64 %x) { define <2 x i64> @shl_sub_i64_vec(<2 x i64> %x) { ; CHECK-LABEL: @shl_sub_i64_vec( -; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i64> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i64> splat (i64 -9223372036854775808), [[X:%.*]] ; CHECK-NEXT: ret <2 x i64> [[R]] ; %s = sub <2 x i64> , %x @@ -58,7 +58,7 @@ define <2 x i64> @shl_sub_i64_vec(<2 x i64> %x) { define <3 x i64> @shl_sub_i64_vec_poison(<3 x i64> %x) { ; CHECK-LABEL: @shl_sub_i64_vec_poison( -; CHECK-NEXT: [[R:%.*]] = lshr exact <3 x i64> , [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = lshr exact <3 x i64> splat (i64 -9223372036854775808), [[X:%.*]] ; CHECK-NEXT: ret <3 x i64> [[R]] ; %s = sub <3 x i64> , %x @@ -136,8 +136,8 @@ define i64 @shl_bad_sub_i64(i64 %x) { define <2 x i64> @shl_bad_sub_i64_vec(<2 x i64> %x) { ; CHECK-LABEL: @shl_bad_sub_i64_vec( -; CHECK-NEXT: [[S:%.*]] = sub <2 x i64> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i64> , [[S]] +; CHECK-NEXT: [[S:%.*]] = sub <2 x i64> splat (i64 53), [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl nuw <2 x i64> splat (i64 1), [[S]] ; CHECK-NEXT: ret <2 x i64> [[R]] ; %s = sub <2 x i64> , %x @@ -147,8 +147,8 @@ define <2 x i64> @shl_bad_sub_i64_vec(<2 x i64> %x) { define <2 x i64> @bad_shl_sub_i64_vec(<2 x i64> %x) { ; CHECK-LABEL: @bad_shl_sub_i64_vec( -; CHECK-NEXT: [[S:%.*]] = sub <2 x i64> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shl <2 x i64> , [[S]] +; CHECK-NEXT: [[S:%.*]] = sub <2 x i64> splat (i64 63), [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shl <2 x i64> splat (i64 2), [[S]] ; CHECK-NEXT: ret <2 x i64> [[R]] ; %s = sub <2 x i64> , %x @@ -159,7 +159,7 @@ define <2 x i64> @bad_shl_sub_i64_vec(<2 x i64> %x) { define <3 x i64> @shl_sub_i64_vec_undef_bad(<3 x i64> %x) { ; CHECK-LABEL: @shl_sub_i64_vec_undef_bad( ; CHECK-NEXT: [[S:%.*]] = sub <3 x i64> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shl nuw <3 x i64> , [[S]] +; CHECK-NEXT: [[R:%.*]] = shl nuw <3 x i64> splat (i64 1), [[S]] ; CHECK-NEXT: ret <3 x i64> [[R]] ; %s = sub <3 x i64> , %x @@ -191,8 +191,8 @@ define i32 @shl_const_op1_sub_const_op0(i32 %x) { define <2 x i32> @shl_const_op1_sub_const_op0_splat(<2 x i32> %x) { ; CHECK-LABEL: @shl_const_op1_sub_const_op0_splat( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = sub <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 3) +; CHECK-NEXT: [[R:%.*]] = sub <2 x i32> splat (i32 336), [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %s = sub <2 x i32> , %x diff --git a/llvm/test/Transforms/InstCombine/shl-unsigned-cmp-const.ll b/llvm/test/Transforms/InstCombine/shl-unsigned-cmp-const.ll index 25b26770c366db..6d6bcb3c46c465 100644 --- a/llvm/test/Transforms/InstCombine/shl-unsigned-cmp-const.ll +++ b/llvm/test/Transforms/InstCombine/shl-unsigned-cmp-const.ll @@ -116,7 +116,7 @@ define i1 @scalar_i8_shl_ugt_const(i8 %x) { define <4 x i1> @vector_4xi32_shl_ult_const(<4 x i32> %x) { ; CHECK-LABEL: @vector_4xi32_shl_ult_const( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 2097088) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -128,7 +128,7 @@ define <4 x i1> @vector_4xi32_shl_ult_const(<4 x i32> %x) { define <4 x i1> @vector_4xi32_shl_ult_const_undef1(<4 x i32> %x) { ; CHECK-LABEL: @vector_4xi32_shl_ult_const_undef1( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i32> [[SHL]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 131072) ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; %shl = shl <4 x i32> %x, @@ -138,7 +138,7 @@ define <4 x i1> @vector_4xi32_shl_ult_const_undef1(<4 x i32> %x) { define <4 x i1> @vector_4xi32_shl_ult_const_undef2(<4 x i32> %x) { ; CHECK-LABEL: @vector_4xi32_shl_ult_const_undef2( -; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], splat (i32 11) ; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i32> [[SHL]], ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -161,7 +161,7 @@ define <4 x i1> @vector_4xi32_shl_ult_const_undef3(<4 x i32> %x) { ; Check 'uge' predicate define <4 x i1> @vector_4xi32_shl_uge_const(<4 x i32> %x) { ; CHECK-LABEL: @vector_4xi32_shl_uge_const( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 2097088) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -173,7 +173,7 @@ define <4 x i1> @vector_4xi32_shl_uge_const(<4 x i32> %x) { ; Check 'ule' predicate define <4 x i1> @vector_4xi32_shl_ule_const(<4 x i32> %x) { ; CHECK-LABEL: @vector_4xi32_shl_ule_const( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 2097088) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; @@ -185,7 +185,7 @@ define <4 x i1> @vector_4xi32_shl_ule_const(<4 x i32> %x) { ; Check 'ugt' predicate define <4 x i1> @vector_4xi32_shl_ugt_const(<4 x i32> %x) { ; CHECK-LABEL: @vector_4xi32_shl_ugt_const( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 2097088) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll index f573ff36d2cea4..3ed7fc2589d7a9 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll @@ -1392,7 +1392,7 @@ define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @add_or(<4 x i32> %v) { ; CHECK-LABEL: @add_or( -; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1407,7 +1407,7 @@ define <4 x i32> @add_or(<4 x i32> %v) { define <4 x i8> @or_add(<4 x i8> %v) { ; CHECK-LABEL: @or_add( -; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3) ; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], ; CHECK-NEXT: ret <4 x i8> [[T3]] ; @@ -1422,7 +1422,7 @@ define <4 x i8> @or_add(<4 x i8> %v) { define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { ; CHECK-LABEL: @or_add_not_enough_masking( -; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 1) ; CHECK-NEXT: [[T1:%.*]] = or <4 x i8> [[V0]], ; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <4 x i8> [[V0]], ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> @@ -1439,7 +1439,7 @@ define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { ; CHECK-LABEL: @add_or_2_vars( -; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] @@ -1453,7 +1453,7 @@ define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) { ; CHECK-LABEL: @or_add_2_vars( -; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], ; CHECK-NEXT: ret <4 x i8> [[T3]] diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll index 7516ea5c0a42cf..21765f74dad789 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -1455,7 +1455,7 @@ define <4 x i32> @neg_mul_2_vars(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @add_or(<4 x i32> %v) { ; CHECK-LABEL: @add_or( -; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1481,7 +1481,7 @@ define <4 x i32> @add_or_disjoint(<4 x i32> %v) { define <4 x i8> @or_add(<4 x i8> %v) { ; CHECK-LABEL: @or_add( -; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3) ; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], ; CHECK-NEXT: ret <4 x i8> [[T3]] ; @@ -1496,7 +1496,7 @@ define <4 x i8> @or_add(<4 x i8> %v) { define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { ; CHECK-LABEL: @or_add_not_enough_masking( -; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 1) ; CHECK-NEXT: [[T1:%.*]] = or <4 x i8> [[V0]], ; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <4 x i8> [[V0]], ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> @@ -1513,7 +1513,7 @@ define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { ; CHECK-LABEL: @add_or_2_vars( -; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] @@ -1527,7 +1527,7 @@ define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) { ; CHECK-LABEL: @or_add_2_vars( -; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], +; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], ; CHECK-NEXT: ret <4 x i8> [[T3]] diff --git a/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll index 4d8481dd759ea6..30a4546790293f 100644 --- a/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll @@ -75,7 +75,7 @@ define i1 @scalar_i32_signbit_lshr_and_ne(i32 %x, i32 %y) { define <4 x i1> @vec_4xi32_signbit_lshr_and_eq(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_signbit_lshr_and_eq( -; CHECK-NEXT: [[LSHR:%.*]] = lshr exact <4 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[LSHR:%.*]] = lshr exact <4 x i32> splat (i32 -2147483648), [[Y:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[LSHR]], [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll index c97731777283d1..e43b31ad24fe1e 100644 --- a/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll @@ -75,7 +75,7 @@ define i1 @scalar_i32_signbit_shl_and_ne(i32 %x, i32 %y) { define <4 x i1> @vec_4xi32_signbit_shl_and_eq(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @vec_4xi32_signbit_shl_and_eq( -; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> splat (i32 -2147483648), [[Y:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[SHL]], [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/signed-mul-lack-of-overflow-check-via-mul-sdiv.ll b/llvm/test/Transforms/InstCombine/signed-mul-lack-of-overflow-check-via-mul-sdiv.ll index 69f50738af6210..8ab79494f962c3 100644 --- a/llvm/test/Transforms/InstCombine/signed-mul-lack-of-overflow-check-via-mul-sdiv.ll +++ b/llvm/test/Transforms/InstCombine/signed-mul-lack-of-overflow-check-via-mul-sdiv.ll @@ -23,7 +23,7 @@ define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( ; CHECK-NEXT: [[MUL:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.smul.with.overflow.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) ; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[MUL]], 1 -; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <2 x i1> [[MUL_OV]], +; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <2 x i1> [[MUL_OV]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[MUL_NOT_OV]] ; %t0 = mul <2 x i8> %x, %y diff --git a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll index 513fb69ab7463e..8015af59f4f819 100644 --- a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll +++ b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll @@ -173,7 +173,7 @@ define i1 @positive_with_extra_and_logical(i32 %arg, i1 %z) { define <2 x i1> @positive_vec_splat(<2 x i32> %arg) { ; CHECK-LABEL: @positive_vec_splat( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <2 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <2 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <2 x i32> %arg, @@ -185,7 +185,7 @@ define <2 x i1> @positive_vec_splat(<2 x i32> %arg) { define <2 x i1> @positive_vec_nonsplat(<2 x i32> %arg) { ; CHECK-LABEL: @positive_vec_nonsplat( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T1:%.*]] = icmp sgt <2 x i32> [[ARG:%.*]], splat (i32 -1) ; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[ARG]], ; CHECK-NEXT: [[T3:%.*]] = icmp ult <2 x i32> [[T2]], ; CHECK-NEXT: [[T4:%.*]] = and <2 x i1> [[T1]], [[T3]] @@ -200,7 +200,7 @@ define <2 x i1> @positive_vec_nonsplat(<2 x i32> %arg) { define <3 x i1> @positive_vec_poison0(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison0( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, @@ -212,7 +212,7 @@ define <3 x i1> @positive_vec_poison0(<3 x i32> %arg) { define <3 x i1> @positive_vec_poison1(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison1( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, @@ -224,7 +224,7 @@ define <3 x i1> @positive_vec_poison1(<3 x i32> %arg) { define <3 x i1> @positive_vec_poison2(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison2( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, @@ -236,7 +236,7 @@ define <3 x i1> @positive_vec_poison2(<3 x i32> %arg) { define <3 x i1> @positive_vec_poison3(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison3( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, @@ -248,7 +248,7 @@ define <3 x i1> @positive_vec_poison3(<3 x i32> %arg) { define <3 x i1> @positive_vec_poison4(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison4( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, @@ -260,7 +260,7 @@ define <3 x i1> @positive_vec_poison4(<3 x i32> %arg) { define <3 x i1> @positive_vec_poison5(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison5( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, @@ -272,7 +272,7 @@ define <3 x i1> @positive_vec_poison5(<3 x i32> %arg) { define <3 x i1> @positive_vec_poison6(<3 x i32> %arg) { ; CHECK-LABEL: @positive_vec_poison6( -; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128) ; CHECK-NEXT: ret <3 x i1> [[T4_SIMPLIFIED]] ; %t1 = icmp sgt <3 x i32> %arg, diff --git a/llvm/test/Transforms/InstCombine/signext.ll b/llvm/test/Transforms/InstCombine/signext.ll index 765a0470c2e483..f72cf29d2fcc7b 100644 --- a/llvm/test/Transforms/InstCombine/signext.ll +++ b/llvm/test/Transforms/InstCombine/signext.ll @@ -34,8 +34,8 @@ define i32 @sextinreg_extra_use(i32 %x) { define <2 x i32> @sextinreg_splat(<2 x i32> %x) { ; CHECK-LABEL: @sextinreg_splat( -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], +; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 16) +; CHECK-NEXT: [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], splat (i32 16) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t1 = and <2 x i32> %x, @@ -58,8 +58,8 @@ define i32 @sextinreg_alt(i32 %x) { define <2 x i32> @sextinreg_alt_splat(<2 x i32> %x) { ; CHECK-LABEL: @sextinreg_alt_splat( -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], +; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 16) +; CHECK-NEXT: [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], splat (i32 16) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t1 = and <2 x i32> %x, @@ -119,8 +119,8 @@ define i32 @sextinreg2(i32 %x) { define <2 x i32> @sextinreg2_splat(<2 x i32> %x) { ; CHECK-LABEL: @sextinreg2_splat( -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], +; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 24) +; CHECK-NEXT: [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], splat (i32 24) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t1 = and <2 x i32> %x, @@ -181,7 +181,7 @@ define i32 @ashr(i32 %x) { define <2 x i32> @ashr_splat(<2 x i32> %x) { ; CHECK-LABEL: @ashr_splat( -; CHECK-NEXT: [[SUB:%.*]] = ashr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SUB:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 5) ; CHECK-NEXT: ret <2 x i32> [[SUB]] ; %shr = lshr <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll b/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll index e7505721cad604..b82e33cd2da83e 100644 --- a/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll +++ b/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll @@ -121,7 +121,7 @@ define i32 @n7(i16 %x) { define <2 x i32> @t8(<2 x i16> %x) { ; CHECK-LABEL: @t8( ; CHECK-NEXT: [[X_SIGNEXT:%.*]] = sext <2 x i16> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[X_SIGNEXT]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[X_SIGNEXT]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %i0 = zext <2 x i16> %x to <2 x i32> @@ -133,7 +133,7 @@ define <2 x i32> @t8(<2 x i16> %x) { define <2 x i32> @t9(<2 x i16> %x) { ; CHECK-LABEL: @t9( ; CHECK-NEXT: [[I1:%.*]] = sext <2 x i16> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[I1]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[I1]], splat (i32 -2147483648) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %i0 = zext <2 x i16> %x to <2 x i32> @@ -146,7 +146,7 @@ define <2 x i32> @t9(<2 x i16> %x) { define <2 x i32> @t10_undef(<2 x i16> %x) { ; CHECK-LABEL: @t10_undef( ; CHECK-NEXT: [[I0:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[I1:%.*]] = shl nuw <2 x i32> [[I0]], +; CHECK-NEXT: [[I1:%.*]] = shl nuw <2 x i32> [[I0]], splat (i32 16) ; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[I1]], ; CHECK-NEXT: ret <2 x i32> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll index 65faf0974b503f..ad88287f1d99e4 100644 --- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll +++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll @@ -158,12 +158,14 @@ entry: define nofpclass(pinf) { <2 x float> } @ret_nofpclass_vector_elems_struct_ty_pinf__ninf() { ; CHECK-LABEL: define nofpclass(pinf) { <2 x float> } @ret_nofpclass_vector_elems_struct_ty_pinf__ninf() { ; CHECK-NEXT: entry: -; CHECK-NEXT: ret { <2 x float> } { <2 x float> } +; CHECK-NEXT: ret { <2 x float> } { <2 x float> splat (float 0xFFF0000000000000) } ; entry: ret { <2 x float>} { <2 x float> } } +; UTC_ARGS: --disable +; FileCheck does not like the nested square brackets. define nofpclass(pinf) [ 1 x [ 1 x float ]] @ret_nofpclass_nested_array_ty_pinf__ninf() { ; CHECK-LABEL: @ret_nofpclass_nested_array_ty_pinf__ninf() { ; CHECK-NEXT: entry: @@ -172,6 +174,7 @@ define nofpclass(pinf) [ 1 x [ 1 x float ]] @ret_nofpclass_nested_array_ty_pinf_ entry: ret [ 1 x [ 1 x float ]] [[ 1 x float ] [float 0xFFF0000000000000]] } +; UTC_ARGS: --enable define nofpclass(pzero) { float, float } @ret_nofpclass_multiple_elems_struct_ty_pzero__nzero() { ; CHECK-LABEL: define nofpclass(pzero) { float, float } @ret_nofpclass_multiple_elems_struct_ty_pzero__nzero() { diff --git a/llvm/test/Transforms/InstCombine/sitofp.ll b/llvm/test/Transforms/InstCombine/sitofp.ll index 51eff39cd900e2..71725ff2f3d0e4 100644 --- a/llvm/test/Transforms/InstCombine/sitofp.ll +++ b/llvm/test/Transforms/InstCombine/sitofp.ll @@ -381,7 +381,7 @@ define i12 @u32_half_u12(i32 %x) { define <2 x i1> @i8_vec_sitofp_test1(<2 x i8> %A) { ; CHECK-LABEL: @i8_vec_sitofp_test1( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %B = sitofp <2 x i8> %A to <2 x double> %C = fcmp ult <2 x double> %B, @@ -390,7 +390,7 @@ define <2 x i1> @i8_vec_sitofp_test1(<2 x i8> %A) { define <2 x i1> @i8_vec_sitofp_test2(<2 x i8> %A) { ; CHECK-LABEL: @i8_vec_sitofp_test2( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %B = sitofp <2 x i8> %A to <2 x double> %C = fcmp ugt <2 x double> %B, @@ -399,7 +399,7 @@ define <2 x i1> @i8_vec_sitofp_test2(<2 x i8> %A) { define <2 x i1> @i8_vec_sitofp_test3(<2 x i8> %A) { ; CHECK-LABEL: @i8_vec_sitofp_test3( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %B = sitofp <2 x i8> %A to <2 x double> %C = fcmp ule <2 x double> %B, @@ -408,7 +408,7 @@ define <2 x i1> @i8_vec_sitofp_test3(<2 x i8> %A) { define <2 x i1> @i8_vec_sitofp_test4(<2 x i8> %A) { ; CHECK-LABEL: @i8_vec_sitofp_test4( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], splat (i8 127) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %B = sitofp <2 x i8> %A to <2 x double> diff --git a/llvm/test/Transforms/InstCombine/smin-icmp.ll b/llvm/test/Transforms/InstCombine/smin-icmp.ll index d1283d8afc0a74..f05c1698f1a980 100644 --- a/llvm/test/Transforms/InstCombine/smin-icmp.ll +++ b/llvm/test/Transforms/InstCombine/smin-icmp.ll @@ -911,7 +911,7 @@ define void @eq_smin_v2i32(<2 x i32> %x, <2 x i32> %y) { ; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], [[X]] ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP1]]) -; CHECK-NEXT: call void @use_v2i1(<2 x i1> ) +; CHECK-NEXT: call void @use_v2i1(<2 x i1> splat (i1 true)) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> zeroinitializer) ; CHECK-NEXT: [[CMP4:%.*]] = icmp sge <2 x i32> [[Y]], [[X]] ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP4]]) @@ -956,24 +956,24 @@ define void @eq_smin_v2i32(<2 x i32> %x, <2 x i32> %y) { ; icmp pred smin(C1, Y), C2 where C1 == C2 define void @eq_smin_v2i32_constant(<2 x i32> %y) { ; CHECK-LABEL: @eq_smin_v2i32_constant( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> ) -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], +; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> splat (i32 10)) +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP1]]) -; CHECK-NEXT: call void @use_v2i1(<2 x i1> ) +; CHECK-NEXT: call void @use_v2i1(<2 x i1> splat (i1 true)) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> zeroinitializer) -; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP4]]) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP5]]) -; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP6]]) -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP7]]) -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP8]]) -; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP9]]) -; CHECK-NEXT: [[CMP10:%.*]] = icmp slt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP10:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP10]]) ; CHECK-NEXT: ret void ; @@ -1004,21 +1004,21 @@ define void @eq_smin_v2i32_constant(<2 x i32> %y) { ; icmp pred smin(C1, Y), C2 where C1 < C2 define void @slt_smin_v2i32_constant(<2 x i32> %y) { ; CHECK-LABEL: @slt_smin_v2i32_constant( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> ) -; CHECK-NEXT: call void @use_v2i1(<2 x i1> ) -; CHECK-NEXT: call void @use_v2i1(<2 x i1> ) +; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> splat (i32 5)) +; CHECK-NEXT: call void @use_v2i1(<2 x i1> splat (i1 true)) +; CHECK-NEXT: call void @use_v2i1(<2 x i1> splat (i1 true)) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> zeroinitializer) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> zeroinitializer) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP5]]) -; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP6]]) -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP7]]) -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP8]]) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> zeroinitializer) -; CHECK-NEXT: call void @use_v2i1(<2 x i1> ) +; CHECK-NEXT: call void @use_v2i1(<2 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; %cond = call <2 x i32> @llvm.smin.v2i32(<2 x i32> , <2 x i32> %y) @@ -1049,23 +1049,23 @@ define void @slt_smin_v2i32_constant(<2 x i32> %y) { define void @sle_smin_v2i32_constant(<2 x i32> %y) { ; CHECK-LABEL: @sle_smin_v2i32_constant( ; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> ) -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP1]]) -; CHECK-NEXT: call void @use_v2i1(<2 x i1> ) +; CHECK-NEXT: call void @use_v2i1(<2 x i1> splat (i1 true)) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> zeroinitializer) -; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP4]]) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP5]]) -; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP6]]) -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP7]]) -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP8]]) -; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP9]]) -; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP10]]) ; CHECK-NEXT: ret void ; @@ -1096,26 +1096,26 @@ define void @sle_smin_v2i32_constant(<2 x i32> %y) { ; icmp pred smin(C1, Y), C2 where C1 > C2 define void @sgt_smin_v2i32_constant(<2 x i32> %y) { ; CHECK-LABEL: @sgt_smin_v2i32_constant( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> ) -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], +; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> splat (i32 15)) +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP1]]) -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP2]]) -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP3]]) -; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP4]]) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP5]]) -; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP6]]) -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP7]]) -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP8]]) -; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP9]]) -; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP10]]) ; CHECK-NEXT: ret void ; @@ -1147,25 +1147,25 @@ define void @sgt_smin_v2i32_constant(<2 x i32> %y) { define void @sge_smin_v2i32_constant(<2 x i32> %y) { ; CHECK-LABEL: @sge_smin_v2i32_constant( ; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> ) -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP1]]) -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP2]]) -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP3]]) -; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP4]]) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP5]]) -; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP6]]) -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP7]]) -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP8]]) -; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP9]]) -; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP10]]) ; CHECK-NEXT: ret void ; @@ -1197,25 +1197,25 @@ define void @sge_smin_v2i32_constant(<2 x i32> %y) { define void @unknown_smin_v2i32_constant(<2 x i32> %y) { ; CHECK-LABEL: @unknown_smin_v2i32_constant( ; CHECK-NEXT: [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> ) -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP1]]) -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP2]]) -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP3]]) -; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP4]]) -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP5]]) -; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP6]]) -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP7]]) -; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP8]]) -; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP9]]) -; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], +; CHECK-NEXT: [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], splat (i32 10) ; CHECK-NEXT: call void @use_v2i1(<2 x i1> [[CMP10]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/InstCombine/sqrt.ll b/llvm/test/Transforms/InstCombine/sqrt.ll index f72fe5a6a5817b..0f4db3b3a65ae7 100644 --- a/llvm/test/Transforms/InstCombine/sqrt.ll +++ b/llvm/test/Transforms/InstCombine/sqrt.ll @@ -192,7 +192,7 @@ define double @sqrt_exp_intr_and_libcall_2(double %x) { define <2 x float> @sqrt_exp_vec(<2 x float> %x) { ; CHECK-LABEL: @sqrt_exp_vec( -; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc <2 x float> [[X:%.*]], +; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc <2 x float> [[X:%.*]], splat (float 5.000000e-01) ; CHECK-NEXT: [[E:%.*]] = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> [[MERGED_SQRT]]) ; CHECK-NEXT: ret <2 x float> [[E]] ; diff --git a/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll b/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll index 70fd4eefe74f5d..3607b191664963 100644 --- a/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll +++ b/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll @@ -53,7 +53,7 @@ define { i8, i1 } @no_fold_on_constant_sub_overflow(i8 %x) { define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { ; CHECK-LABEL: @fold_simple_splat_constant( -; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 -42)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = sub nsw <2 x i32> %x, @@ -64,7 +64,7 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { ; CHECK-LABEL: @no_fold_splat_undef_constant( ; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 -30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = sub nsw <2 x i32> %x, @@ -75,7 +75,7 @@ define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @no_fold_splat_not_constant( ; CHECK-NEXT: [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 -30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = sub nsw <2 x i32> %x, %y diff --git a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll index e21ca605fc5af5..9774f6db555440 100644 --- a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll +++ b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll @@ -130,7 +130,7 @@ define i32 @neg_or_ashr_i32_commute(i32 %x0) { define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @sub_ashr_or_i32_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[X]] +; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> [[X]] ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %sub = sub nsw <4 x i32> %y, %x @@ -142,7 +142,7 @@ define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @sub_ashr_or_i32_vec_nuw_nsw( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[X]] +; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> [[X]] ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %sub = sub nuw nsw <4 x i32> %y, %x @@ -166,7 +166,7 @@ define <4 x i32> @neg_or_ashr_i32_vec(<4 x i32> %x) { define <4 x i32> @sub_ashr_or_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @sub_ashr_or_i32_vec_commute( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[X]] +; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> [[X]] ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %sub = sub nsw <4 x i32> %y, %x @@ -177,7 +177,7 @@ define <4 x i32> @sub_ashr_or_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @neg_or_ashr_i32_vec_commute(<4 x i32> %x0) { ; CHECK-LABEL: @neg_or_ashr_i32_vec_commute( -; CHECK-NEXT: [[X:%.*]] = sdiv <4 x i32> , [[X0:%.*]] +; CHECK-NEXT: [[X:%.*]] = sdiv <4 x i32> splat (i32 42), [[X0:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[X]], zeroinitializer ; CHECK-NEXT: [[SHR:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHR]] @@ -283,7 +283,7 @@ define i32 @neg_or_extra_use_ashr_i32(i32 %x, ptr %p) { define <4 x i32> @sub_ashr_or_i32_vec_undef1(<4 x i32> %x) { ; CHECK-LABEL: @sub_ashr_or_i32_vec_undef1( ; CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> , [[X:%.*]] -; CHECK-NEXT: [[SHR:%.*]] = ashr <4 x i32> [[SUB]], +; CHECK-NEXT: [[SHR:%.*]] = ashr <4 x i32> [[SUB]], splat (i32 31) ; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[SHR]], [[X]] ; CHECK-NEXT: ret <4 x i32> [[OR]] ; @@ -295,7 +295,7 @@ define <4 x i32> @sub_ashr_or_i32_vec_undef1(<4 x i32> %x) { define <4 x i32> @sub_ashr_or_i32_vec_undef2(<4 x i32> %x) { ; CHECK-LABEL: @sub_ashr_or_i32_vec_undef2( -; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> , [[X:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> splat (i32 255), [[X:%.*]] ; CHECK-NEXT: [[SHR:%.*]] = ashr <4 x i32> [[SUB]], ; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[SHR]], [[X]] ; CHECK-NEXT: ret <4 x i32> [[OR]] diff --git a/llvm/test/Transforms/InstCombine/sub-lshr-or-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-lshr-or-to-icmp-select.ll index 33c02d77c45b90..1010aab7cbb0b0 100644 --- a/llvm/test/Transforms/InstCombine/sub-lshr-or-to-icmp-select.ll +++ b/llvm/test/Transforms/InstCombine/sub-lshr-or-to-icmp-select.ll @@ -47,7 +47,7 @@ define <4 x i32> @neg_or_lshr_i32_vec(<4 x i32> %x) { define <4 x i32> @neg_or_lshr_i32_vec_commute(<4 x i32> %x0) { ; CHECK-LABEL: @neg_or_lshr_i32_vec_commute( -; CHECK-NEXT: [[X:%.*]] = sdiv <4 x i32> , [[X0:%.*]] +; CHECK-NEXT: [[X:%.*]] = sdiv <4 x i32> splat (i32 42), [[X0:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[X]], zeroinitializer ; CHECK-NEXT: [[SHR:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHR]] diff --git a/llvm/test/Transforms/InstCombine/sub-not.ll b/llvm/test/Transforms/InstCombine/sub-not.ll index 5053319162f0d2..9fa55a6896126a 100644 --- a/llvm/test/Transforms/InstCombine/sub-not.ll +++ b/llvm/test/Transforms/InstCombine/sub-not.ll @@ -29,7 +29,7 @@ define i8 @sub_not_extra_use(i8 %x, i8 %y) { define <2 x i8> @sub_not_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @sub_not_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[Y:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -64,7 +64,7 @@ define i8 @dec_sub_extra_use(i8 %x, i8 %y) { define <2 x i8> @dec_sub_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @dec_sub_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -99,7 +99,7 @@ define i8 @sub_inc_extra_use(i8 %x, i8 %y) { define <2 x i8> @sub_inc_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @sub_inc_vec( -; CHECK-NEXT: [[S_NEG:%.*]] = xor <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[S_NEG:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[Y:%.*]], [[S_NEG]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -134,7 +134,7 @@ define i8 @sub_dec_extra_use(i8 %x, i8 %y) { define <2 x i8> @sub_dec_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @sub_dec_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll index 60607041ad2f90..3bbb9b931e4331 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll @@ -572,7 +572,7 @@ define i4 @negate_xor(i4 %x) { define <2 x i4> @negate_xor_vec(<2 x i4> %x) { ; CHECK-LABEL: @negate_xor_vec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[TMP1]], +; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[TMP1]], splat (i4 1) ; CHECK-NEXT: ret <2 x i4> [[O_NEG]] ; %o = xor <2 x i4> %x, @@ -623,8 +623,8 @@ define i8 @negate_shl_not_uses(i8 %x, i8 %y) { define <2 x i4> @negate_mul_not_uses_vec(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @negate_mul_not_uses_vec( -; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[O:%.*]] = xor <2 x i4> [[X]], +; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[X:%.*]], splat (i4 1) +; CHECK-NEXT: [[O:%.*]] = xor <2 x i4> [[X]], splat (i4 -1) ; CHECK-NEXT: call void @use_v2i4(<2 x i4> [[O]]) ; CHECK-NEXT: [[S_NEG:%.*]] = mul <2 x i4> [[O_NEG]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i4> [[S_NEG]] diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll index b19eae4d8f9a41..bffdc6d6cd497b 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll @@ -596,7 +596,7 @@ define i4 @negate_xor(i4 %x) { define <2 x i4> @negate_xor_vec(<2 x i4> %x) { ; CHECK-LABEL: @negate_xor_vec( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[TMP1]], +; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[TMP1]], splat (i4 1) ; CHECK-NEXT: ret <2 x i4> [[O_NEG]] ; %o = xor <2 x i4> %x, @@ -647,8 +647,8 @@ define i8 @negate_shl_not_uses(i8 %x, i8 %y) { define <2 x i4> @negate_mul_not_uses_vec(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @negate_mul_not_uses_vec( -; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[O:%.*]] = xor <2 x i4> [[X]], +; CHECK-NEXT: [[O_NEG:%.*]] = add <2 x i4> [[X:%.*]], splat (i4 1) +; CHECK-NEXT: [[O:%.*]] = xor <2 x i4> [[X]], splat (i4 -1) ; CHECK-NEXT: call void @use_v2i4(<2 x i4> [[O]]) ; CHECK-NEXT: [[S_NEG:%.*]] = mul <2 x i4> [[O_NEG]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i4> [[S_NEG]] @@ -1458,8 +1458,8 @@ if.end: define <1 x i64> @PR56601(<1 x i64> %x, <1 x i64> %y) { ; CHECK-LABEL: @PR56601( -; CHECK-NEXT: [[M1:%.*]] = mul nsw <1 x i64> [[X:%.*]], -; CHECK-NEXT: [[M2:%.*]] = mul nsw <1 x i64> [[Y:%.*]], +; CHECK-NEXT: [[M1:%.*]] = mul nsw <1 x i64> [[X:%.*]], splat (i64 42) +; CHECK-NEXT: [[M2:%.*]] = mul nsw <1 x i64> [[Y:%.*]], splat (i64 12) ; CHECK-NEXT: [[A1:%.*]] = add <1 x i64> [[M1]], ; CHECK-NEXT: [[A2:%.*]] = add <1 x i64> [[M2]], ; CHECK-NEXT: [[R:%.*]] = sub <1 x i64> [[A1]], [[A2]] diff --git a/llvm/test/Transforms/InstCombine/sub-xor.ll b/llvm/test/Transforms/InstCombine/sub-xor.ll index b4add9698b1609..a4135e0b514532 100644 --- a/llvm/test/Transforms/InstCombine/sub-xor.ll +++ b/llvm/test/Transforms/InstCombine/sub-xor.ll @@ -16,8 +16,8 @@ define i32 @low_mask_nsw_nuw(i32 %x) { define <2 x i32> @low_mask_nsw_nuw_vec(<2 x i32> %x) { ; CHECK-LABEL: @low_mask_nsw_nuw_vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[SUB:%.*]] = xor <2 x i32> [[AND]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 31) +; CHECK-NEXT: [[SUB:%.*]] = xor <2 x i32> [[AND]], splat (i32 63) ; CHECK-NEXT: ret <2 x i32> [[SUB]] ; %and = and <2 x i32> %x, @@ -73,8 +73,8 @@ define i8 @arbitrary_mask_sub_nuw_high_bit_dont_care_i8(i8 %x) { define <2 x i5> @arbitrary_mask_sub_v2i5(<2 x i5> %x) { ; CHECK-LABEL: @arbitrary_mask_sub_v2i5( -; CHECK-NEXT: [[A:%.*]] = and <2 x i5> [[X:%.*]], -; CHECK-NEXT: [[M:%.*]] = sub nuw nsw <2 x i5> , [[A]] +; CHECK-NEXT: [[A:%.*]] = and <2 x i5> [[X:%.*]], splat (i5 -8) +; CHECK-NEXT: [[M:%.*]] = sub nuw nsw <2 x i5> splat (i5 -6), [[A]] ; CHECK-NEXT: ret <2 x i5> [[M]] ; %a = and <2 x i5> %x, ; 0b11000 @@ -135,8 +135,8 @@ define i32 @xor_add_extra_use(i32 %x) { define <2 x i8> @xor_add_splat(<2 x i8> %x) { ; CHECK-LABEL: @xor_add_splat( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[ADD:%.*]] = sub nuw nsw <2 x i8> , [[AND]] +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 24) +; CHECK-NEXT: [[ADD:%.*]] = sub nuw nsw <2 x i8> splat (i8 105), [[AND]] ; CHECK-NEXT: ret <2 x i8> [[ADD]] ; %and = and <2 x i8> %x, @@ -147,9 +147,9 @@ define <2 x i8> @xor_add_splat(<2 x i8> %x) { define <2 x i8> @xor_add_splat_undef(<2 x i8> %x) { ; CHECK-LABEL: @xor_add_splat_undef( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 24) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i8> [[AND]], -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[XOR]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[XOR]], splat (i8 42) ; CHECK-NEXT: ret <2 x i8> [[ADD]] ; %and = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index ff3f046607ec3a..b1313007d509fc 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -320,7 +320,7 @@ define i32 @test13(i32 %A) { define <2 x i32> @test12vec(<2 x i32> %A) { ; CHECK-LABEL: @test12vec( -; CHECK-NEXT: [[B_NEG:%.*]] = lshr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B_NEG:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[B_NEG]] ; %B = ashr <2 x i32> %A, @@ -330,7 +330,7 @@ define <2 x i32> @test12vec(<2 x i32> %A) { define <2 x i32> @test13vec(<2 x i32> %A) { ; CHECK-LABEL: @test13vec( -; CHECK-NEXT: [[B_NEG:%.*]] = ashr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B_NEG:%.*]] = ashr <2 x i32> [[A:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[B_NEG]] ; %B = lshr <2 x i32> %A, @@ -619,7 +619,7 @@ define <2 x i32> @test27vec(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @test27vecsplat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test27vecsplat( -; CHECK-NEXT: [[MUL_NEG:%.*]] = shl <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[MUL_NEG:%.*]] = shl <2 x i32> [[Y:%.*]], splat (i32 3) ; CHECK-NEXT: [[SUB:%.*]] = add <2 x i32> [[MUL_NEG]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[SUB]] ; @@ -663,7 +663,7 @@ define <2 x i32> @test27commutedvec(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @test27commutedvecsplat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test27commutedvecsplat( -; CHECK-NEXT: [[MUL_NEG:%.*]] = shl <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[MUL_NEG:%.*]] = shl <2 x i32> [[Y:%.*]], splat (i32 3) ; CHECK-NEXT: [[SUB:%.*]] = add <2 x i32> [[MUL_NEG]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[SUB]] ; @@ -749,7 +749,7 @@ define <2 x i64> @test36(<2 x i64> %A) { define <2 x i32> @test37(<2 x i32> %A) { ; CHECK-LABEL: @test37( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[A:%.*]], splat (i32 -2147483648) ; CHECK-NEXT: [[DIV_NEG:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[DIV_NEG]] ; @@ -832,7 +832,7 @@ define i32 @test44(i32 %x) { define <2 x i32> @test44vec(<2 x i32> %x) { ; CHECK-LABEL: @test44vec( -; CHECK-NEXT: [[SUB:%.*]] = add nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SUB:%.*]] = add nsw <2 x i32> [[X:%.*]], splat (i32 -32768) ; CHECK-NEXT: ret <2 x i32> [[SUB]] ; %sub = sub nsw <2 x i32> %x, @@ -850,7 +850,7 @@ define @test44scalablevec( %x) { define <2 x i16> @test44vecminval(<2 x i16> %x) { ; CHECK-LABEL: @test44vecminval( -; CHECK-NEXT: [[SUB:%.*]] = xor <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[SUB:%.*]] = xor <2 x i16> [[X:%.*]], splat (i16 -32768) ; CHECK-NEXT: ret <2 x i16> [[SUB]] ; %sub = sub nsw <2 x i16> %x, @@ -1001,7 +1001,7 @@ define i32 @test54(i1 %C) { define <2 x i32> @test54vec(i1 %C) { ; CHECK-LABEL: @test54vec( -; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 -877), <2 x i32> splat (i32 113) ; CHECK-NEXT: ret <2 x i32> [[V]] ; %A = select i1 %C, <2 x i32> , <2 x i32> @@ -1048,7 +1048,7 @@ define <2 x i32> @test55vec(i1 %which) { ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[A_NEG:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[A_NEG:%.*]] = phi <2 x i32> [ splat (i32 -877), [[ENTRY:%.*]] ], [ splat (i32 113), [[DELAY]] ] ; CHECK-NEXT: ret <2 x i32> [[A_NEG]] ; entry: @@ -1228,8 +1228,8 @@ define i32 @test62(i32 %A) { define <2 x i32> @test62vec(<2 x i32> %A) { ; CHECK-LABEL: @test62vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = sub <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 1) +; CHECK-NEXT: [[C:%.*]] = sub <2 x i32> splat (i32 2), [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = sub <2 x i32> , %A @@ -1250,7 +1250,7 @@ define i32 @test63(i32 %A) { define <2 x i32> @test63vec(<2 x i32> %A) { ; CHECK-LABEL: @test63vec( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %B = sub <2 x i32> , %A @@ -1314,8 +1314,8 @@ define i32 @test67(i32 %x) { ; Check splat vectors too define <2 x i32> @test68(<2 x i32> %x) { ; CHECK-LABEL: @test68( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) -; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 255)) +; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[DOTNEG]] ; %1 = xor <2 x i32> %x, @@ -1329,7 +1329,7 @@ define <2 x i32> @test68(<2 x i32> %x) { define <2 x i32> @test69(<2 x i32> %x) { ; CHECK-LABEL: @test69( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) -; CHECK-NEXT: [[DOTNEG:%.*]] = add <2 x i32> [[TMP1]], +; CHECK-NEXT: [[DOTNEG:%.*]] = add <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[DOTNEG]] ; %1 = xor <2 x i32> %x, @@ -1367,7 +1367,7 @@ define i32 @test71(i32 %A, i32 %B) { ; Check (X | Y) - Y --> X & ~Y where X and Y are vectors define <2 x i32> @test72(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test72( -; CHECK-NEXT: [[B_NOT:%.*]] = xor <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[B_NOT:%.*]] = xor <2 x i32> [[B:%.*]], splat (i32 -1) ; CHECK-NEXT: [[D:%.*]] = and <2 x i32> [[A:%.*]], [[B_NOT]] ; CHECK-NEXT: ret <2 x i32> [[D]] ; @@ -1552,10 +1552,10 @@ define i8 @sub_not_mask_lowbits(i8 %x) { define <2 x i8> @sub_mask_lowbits_splat_extra_use(<2 x i8> %x, ptr %p) { ; CHECK-LABEL: @sub_mask_lowbits_splat_extra_use( -; CHECK-NEXT: [[A2:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A2:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 10) ; CHECK-NEXT: store <2 x i8> [[A2]], ptr [[P:%.*]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], -; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X]], splat (i8 -11) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], splat (i8 -64) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a1 = add <2 x i8> %x, ; 0xc0 @@ -2056,7 +2056,7 @@ define i8 @mul_sub_common_factor_commute1(i8 %x, i8 %y) { define <2 x i8> @mul_sub_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @mul_sub_common_factor_commute2( -; CHECK-NEXT: [[M1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[M1:%.*]] = add <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[A:%.*]] = mul <2 x i8> [[M1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[A]] ; @@ -2204,9 +2204,9 @@ define i8 @shrink_sub_from_constant_lowbits2(i8 %x) { define <2 x i8> @shrink_sub_from_constant_lowbits3(<2 x i8> %x) { ; CHECK-LABEL: @shrink_sub_from_constant_lowbits3( -; CHECK-NEXT: [[X0000:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> , [[X0000]] -; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i8> [[SUB]], +; CHECK-NEXT: [[X0000:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 4) +; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 24), [[X0000]] +; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i8> [[SUB]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x0000 = shl <2 x i8> %x, ; 4 low bits are known zero @@ -2373,8 +2373,8 @@ define i10 @sub_to_and_negative4(i10 %x) { define <2 x i8> @sub_to_and_vector1(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector1( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 120) +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[TMP1]], splat (i8 -9) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %sub = sub nuw <2 x i8> , %x @@ -2387,8 +2387,8 @@ define <2 x i8> @sub_to_and_vector1(<2 x i8> %x) { define <2 x i8> @sub_to_and_vector2(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector2( ; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], -; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> , [[AND]] +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], splat (i8 120) +; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> splat (i8 77), [[AND]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %sub = sub nuw <2 x i8> , %x @@ -2400,9 +2400,9 @@ define <2 x i8> @sub_to_and_vector2(<2 x i8> %x) { define <2 x i8> @sub_to_and_vector3(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector3( -; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 71), [[X:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], -; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> , [[AND]] +; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> splat (i8 44), [[AND]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %sub = sub nuw <2 x i8> , %x @@ -2414,8 +2414,8 @@ define <2 x i8> @sub_to_and_vector3(<2 x i8> %x) { define <2 x i8> @sub_to_and_vector4(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector4( -; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], +; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 71), [[X:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], splat (i8 120) ; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> , [[AND]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll b/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll index 4593730b8809f4..6a9b31f3c040e6 100644 --- a/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll +++ b/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll @@ -401,7 +401,7 @@ define <8 x i8> @narrow_zext_ashr_keep_trunc_vector(<8 x i8> %i1, <8 x i8> %i2) ; CHECK-NEXT: [[I1_EXT:%.*]] = sext <8 x i8> [[I1]] to <8 x i32> ; CHECK-NEXT: [[I2_EXT:%.*]] = sext <8 x i8> [[I2]] to <8 x i32> ; CHECK-NEXT: [[SUB:%.*]] = add nsw <8 x i32> [[I1_EXT]], [[I2_EXT]] -; CHECK-NEXT: [[SHIFT:%.*]] = lshr <8 x i32> [[SUB]], +; CHECK-NEXT: [[SHIFT:%.*]] = lshr <8 x i32> [[SUB]], splat (i32 1) ; CHECK-NEXT: [[T:%.*]] = trunc <8 x i32> [[SHIFT]] to <8 x i8> ; CHECK-NEXT: ret <8 x i8> [[T]] ; diff --git a/llvm/test/Transforms/InstCombine/trunc-demand.ll b/llvm/test/Transforms/InstCombine/trunc-demand.ll index 9d7bf589268e2b..aa8021c059013d 100644 --- a/llvm/test/Transforms/InstCombine/trunc-demand.ll +++ b/llvm/test/Transforms/InstCombine/trunc-demand.ll @@ -85,8 +85,8 @@ define i6 @trunc_lshr_use2(i8 %x) { define <2 x i7> @trunc_lshr_vec_splat(<2 x i16> %x) { ; CHECK-LABEL: @trunc_lshr_vec_splat( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i7> -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i7> [[TMP1]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i7> [[TMP1]], splat (i7 5) +; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[TMP2]], splat (i7 1) ; CHECK-NEXT: ret <2 x i7> [[R]] ; %s = lshr <2 x i16> %x, @@ -100,7 +100,7 @@ define <2 x i7> @trunc_lshr_vec_splat(<2 x i16> %x) { define <2 x i7> @trunc_lshr_vec_splat_exact_mask(<2 x i16> %x) { ; CHECK-LABEL: @trunc_lshr_vec_splat_exact_mask( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i7> -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i7> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i7> [[TMP1]], splat (i7 6) ; CHECK-NEXT: ret <2 x i7> [[TMP2]] ; %s = lshr <2 x i16> %x, @@ -113,9 +113,9 @@ define <2 x i7> @trunc_lshr_vec_splat_exact_mask(<2 x i16> %x) { define <2 x i7> @trunc_lshr_big_shift(<2 x i16> %x) { ; CHECK-LABEL: @trunc_lshr_big_shift( -; CHECK-NEXT: [[S:%.*]] = lshr <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[S:%.*]] = lshr <2 x i16> [[X:%.*]], splat (i16 7) ; CHECK-NEXT: [[T:%.*]] = trunc <2 x i16> [[S]] to <2 x i7> -; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[T]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[T]], splat (i7 1) ; CHECK-NEXT: ret <2 x i7> [[R]] ; %s = lshr <2 x i16> %x, diff --git a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll index 063006ba5eea8b..33fa2c375f1eca 100644 --- a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll @@ -24,7 +24,7 @@ define i64 @test1(i64 %a) { define <2 x i64> @test1_vec(<2 x i64> %a) { ; CHECK-LABEL: @test1_vec( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[A]], +; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[A]], splat (i64 15) ; CHECK-NEXT: call void @use_vec(<2 x i32> [[B]]) ; CHECK-NEXT: ret <2 x i64> [[D]] ; @@ -82,8 +82,8 @@ define i64 @test2(i64 %a) { define <2 x i64> @test2_vec(<2 x i64> %a) { ; CHECK-LABEL: @test2_vec( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[D1:%.*]] = shl <2 x i64> [[A]], -; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i64> [[D1]], +; CHECK-NEXT: [[D1:%.*]] = shl <2 x i64> [[A]], splat (i64 36) +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i64> [[D1]], splat (i64 36) ; CHECK-NEXT: call void @use_vec(<2 x i32> [[B]]) ; CHECK-NEXT: ret <2 x i64> [[D]] ; @@ -229,8 +229,8 @@ define i32 @trunc_ashr(i32 %X) { define <2 x i32> @trunc_ashr_vec(<2 x i32> %X) { ; CHECK-LABEL: @trunc_ashr_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 8) +; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[TMP1]], splat (i32 -8388608) ; CHECK-NEXT: ret <2 x i32> [[C]] ; %A = zext <2 x i32> %X to <2 x i36> @@ -272,7 +272,7 @@ define <2 x i64> @test8_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test8_vec( ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[E:%.*]] = shl nuw <2 x i64> [[D]], +; CHECK-NEXT: [[E:%.*]] = shl nuw <2 x i64> [[D]], splat (i64 32) ; CHECK-NEXT: [[F:%.*]] = or disjoint <2 x i64> [[E]], [[C]] ; CHECK-NEXT: ret <2 x i64> [[F]] ; @@ -358,7 +358,7 @@ define i64 @test11(i32 %A, i32 %B) { define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test11_vec( ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31) ; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[F:%.*]] = shl nuw nsw <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] @@ -422,7 +422,7 @@ define i64 @test12(i32 %A, i32 %B) { define <2 x i64> @test12_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test12_vec( ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31) ; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[F:%.*]] = lshr <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] @@ -486,7 +486,7 @@ define i64 @test13(i32 %A, i32 %B) { define <2 x i64> @test13_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test13_vec( ; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31) ; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[F:%.*]] = ashr <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] @@ -704,7 +704,7 @@ define i32 @trunc_shl_32_i32_i64(i64 %val) { define <2 x i32> @trunc_shl_16_v2i32_v2i64(<2 x i64> %val) { ; CHECK-LABEL: @trunc_shl_16_v2i32_v2i64( ; CHECK-NEXT: [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32> -; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], +; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], splat (i32 16) ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; %shl = shl <2 x i64> %val, @@ -754,7 +754,7 @@ define i32 @trunc_shl_lshr_infloop(i64 %arg) { define <2 x i32> @trunc_shl_v2i32_v2i64_uniform(<2 x i64> %val) { ; CHECK-LABEL: @trunc_shl_v2i32_v2i64_uniform( ; CHECK-NEXT: [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32> -; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], +; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; %shl = shl <2 x i64> %val, @@ -862,7 +862,7 @@ define i32 @trunc_shl_shl_var(i64 %arg, i64 %val) { define <8 x i16> @trunc_shl_v8i15_v8i32_15(<8 x i32> %a) { ; CHECK-LABEL: @trunc_shl_v8i15_v8i32_15( ; CHECK-NEXT: [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16> -; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], +; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[CONV]] ; %shl = shl <8 x i32> %a, @@ -891,7 +891,7 @@ define <8 x i16> @trunc_shl_v8i16_v8i32_17(<8 x i32> %a) { define <8 x i16> @trunc_shl_v8i16_v8i32_4(<8 x i32> %a) { ; CHECK-LABEL: @trunc_shl_v8i16_v8i32_4( ; CHECK-NEXT: [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16> -; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], +; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], splat (i16 4) ; CHECK-NEXT: ret <8 x i16> [[CONV]] ; %shl = shl <8 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll index c50a3d06d24b9c..69ccca562e885c 100644 --- a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll @@ -34,7 +34,7 @@ define <2 x i8> @trunc_shl_trunc(<2 x i64> %a) { define <2 x i8> @trunc_lshr_trunc_uniform(<2 x i64> %a) { ; CHECK-LABEL: @trunc_lshr_trunc_uniform( -; CHECK-NEXT: [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 8) ; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[C1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; @@ -120,7 +120,7 @@ define i8 @trunc_ashr_trunc_exact(i64 %a) { define <2 x i8> @trunc_ashr_trunc_uniform(<2 x i64> %a) { ; CHECK-LABEL: @trunc_ashr_trunc_uniform( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 8) ; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; diff --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll index 0956fc655ec9a7..15533346b32ece 100644 --- a/llvm/test/Transforms/InstCombine/trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc.ll @@ -25,7 +25,7 @@ define i64 @test1(i64 %a) { define <2 x i64> @test1_vec(<2 x i64> %a) { ; CHECK-LABEL: @test1_vec( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[A]], +; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[A]], splat (i64 15) ; CHECK-NEXT: call void @use_vec(<2 x i32> [[B]]) ; CHECK-NEXT: ret <2 x i64> [[D]] ; @@ -83,8 +83,8 @@ define i64 @test2(i64 %a) { define <2 x i64> @test2_vec(<2 x i64> %a) { ; CHECK-LABEL: @test2_vec( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[D1:%.*]] = shl <2 x i64> [[A]], -; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i64> [[D1]], +; CHECK-NEXT: [[D1:%.*]] = shl <2 x i64> [[A]], splat (i64 36) +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i64> [[D1]], splat (i64 36) ; CHECK-NEXT: call void @use_vec(<2 x i32> [[B]]) ; CHECK-NEXT: ret <2 x i64> [[D]] ; @@ -230,8 +230,8 @@ define i32 @trunc_ashr(i32 %X) { define <2 x i32> @trunc_ashr_vec(<2 x i32> %X) { ; CHECK-LABEL: @trunc_ashr_vec( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], splat (i32 -8388608) ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %A = zext <2 x i32> %X to <2 x i36> @@ -273,7 +273,7 @@ define <2 x i64> @test8_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test8_vec( ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[E:%.*]] = shl nuw <2 x i64> [[D]], +; CHECK-NEXT: [[E:%.*]] = shl nuw <2 x i64> [[D]], splat (i64 32) ; CHECK-NEXT: [[F:%.*]] = or disjoint <2 x i64> [[E]], [[C]] ; CHECK-NEXT: ret <2 x i64> [[F]] ; @@ -359,7 +359,7 @@ define i64 @test11(i32 %A, i32 %B) { define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test11_vec( ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31) ; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[F:%.*]] = shl nuw nsw <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] @@ -423,7 +423,7 @@ define i64 @test12(i32 %A, i32 %B) { define <2 x i64> @test12_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test12_vec( ; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31) ; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[F:%.*]] = lshr <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] @@ -487,7 +487,7 @@ define i64 @test13(i32 %A, i32 %B) { define <2 x i64> @test13_vec(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @test13_vec( ; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31) ; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[F:%.*]] = ashr <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[F]] @@ -705,7 +705,7 @@ define i32 @trunc_shl_32_i32_i64(i64 %val) { define <2 x i32> @trunc_shl_16_v2i32_v2i64(<2 x i64> %val) { ; CHECK-LABEL: @trunc_shl_16_v2i32_v2i64( ; CHECK-NEXT: [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32> -; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], +; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], splat (i32 16) ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; %shl = shl <2 x i64> %val, @@ -755,7 +755,7 @@ define i32 @trunc_shl_lshr_infloop(i64 %arg) { define <2 x i32> @trunc_shl_v2i32_v2i64_uniform(<2 x i64> %val) { ; CHECK-LABEL: @trunc_shl_v2i32_v2i64_uniform( ; CHECK-NEXT: [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32> -; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], +; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; %shl = shl <2 x i64> %val, @@ -863,7 +863,7 @@ define i32 @trunc_shl_shl_var(i64 %arg, i64 %val) { define <8 x i16> @trunc_shl_v8i15_v8i32_15(<8 x i32> %a) { ; CHECK-LABEL: @trunc_shl_v8i15_v8i32_15( ; CHECK-NEXT: [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16> -; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], +; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], splat (i16 15) ; CHECK-NEXT: ret <8 x i16> [[CONV]] ; %shl = shl <8 x i32> %a, @@ -892,7 +892,7 @@ define <8 x i16> @trunc_shl_v8i16_v8i32_17(<8 x i32> %a) { define <8 x i16> @trunc_shl_v8i16_v8i32_4(<8 x i32> %a) { ; CHECK-LABEL: @trunc_shl_v8i16_v8i32_4( ; CHECK-NEXT: [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16> -; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], +; CHECK-NEXT: [[CONV:%.*]] = shl <8 x i16> [[A_TR]], splat (i16 4) ; CHECK-NEXT: ret <8 x i16> [[CONV]] ; %shl = shl <8 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/truncating-saturate.ll b/llvm/test/Transforms/InstCombine/truncating-saturate.ll index c0111528e2a4d8..44bba283c66862 100644 --- a/llvm/test/Transforms/InstCombine/truncating-saturate.ll +++ b/llvm/test/Transforms/InstCombine/truncating-saturate.ll @@ -62,8 +62,8 @@ define i16 @testi32i16i8(i32 %add) { define <4 x i16> @testv4i32i16i8(<4 x i32> %add) { ; CHECK-LABEL: @testv4i32i16i8( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[ADD:%.*]], <4 x i32> ) -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[ADD:%.*]], <4 x i32> splat (i32 -128)) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 127)) ; CHECK-NEXT: [[R:%.*]] = trunc nsw <4 x i32> [[TMP2]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[R]] ; @@ -147,8 +147,8 @@ define i32 @testi64i32addsat(i32 %a, i32 %b) { define <4 x i8> @testv4i16i8(<4 x i16> %add) { ; CHECK-LABEL: @testv4i16i8( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[ADD:%.*]], <4 x i16> ) -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[ADD:%.*]], <4 x i16> splat (i16 -128)) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 127)) ; CHECK-NEXT: [[COND_I:%.*]] = trunc nsw <4 x i16> [[TMP2]] to <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[COND_I]] ; @@ -567,7 +567,7 @@ define i8 @C0zero(i8 %X, i8 %y, i8 %z) { define <2 x i8> @C0zeroV(<2 x i8> %X, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @C0zeroV( -; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 -10) ; CHECK-NEXT: [[F:%.*]] = select <2 x i1> [[C]], <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]] ; CHECK-NEXT: ret <2 x i8> [[F]] ; @@ -581,9 +581,9 @@ define <2 x i8> @C0zeroV(<2 x i8> %X, <2 x i8> %y, <2 x i8> %z) { define <2 x i8> @C0zeroVu(<2 x i8> %X, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @C0zeroVu( -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 10) ; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[A]], -; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i8> [[X]], +; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i8> [[X]], splat (i8 -10) ; CHECK-NEXT: [[F:%.*]] = select <2 x i1> [[C]], <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[X]], <2 x i8> [[F]] ; CHECK-NEXT: ret <2 x i8> [[R]] diff --git a/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll index fd5d38bb38ddc7..eb021a0fd2c894 100644 --- a/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll +++ b/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll @@ -42,7 +42,7 @@ define { i8, i1 } @no_fold_on_constant_add_overflow(i8 %x) { define { <2 x i8>, <2 x i1> } @no_fold_vector_no_overflow(<2 x i8> %x) { ; CHECK-LABEL: @no_fold_vector_no_overflow( ; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> ) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> splat (i8 55)) ; CHECK-NEXT: ret { <2 x i8>, <2 x i1> } [[B]] ; %a = add nuw <2 x i8> %x, @@ -53,7 +53,7 @@ define { <2 x i8>, <2 x i1> } @no_fold_vector_no_overflow(<2 x i8> %x) { define { <2 x i8>, <2 x i1> } @no_fold_vector_overflow(<2 x i8> %x) { ; CHECK-LABEL: @no_fold_vector_overflow( ; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> ) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> splat (i8 55)) ; CHECK-NEXT: ret { <2 x i8>, <2 x i1> } [[B]] ; %a = add nuw <2 x i8> %x, @@ -63,7 +63,7 @@ define { <2 x i8>, <2 x i1> } @no_fold_vector_overflow(<2 x i8> %x) { define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { ; CHECK-LABEL: @fold_simple_splat_constant( -; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 42)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = add nuw <2 x i32> %x, @@ -74,7 +74,7 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { ; CHECK-LABEL: @no_fold_splat_undef_constant( ; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = add nuw <2 x i32> %x, @@ -85,7 +85,7 @@ define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @no_fold_splat_not_constant( ; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = add nuw <2 x i32> %x, %y @@ -128,7 +128,7 @@ define { i32, i1 } @no_fold_wrapped_add(i32 %x) { define { <2 x i32>, <2 x i1> } @fold_simple_splat_with_disjoint_or_constant(<2 x i32> %x) { ; CHECK-LABEL: @fold_simple_splat_with_disjoint_or_constant( -; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 42)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = or disjoint <2 x i32> %x, @@ -139,8 +139,8 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_with_disjoint_or_constant(<2 x define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant_with_or_fail(<2 x i32> %x) { ; CHECK-LABEL: @fold_simple_splat_constant_with_or_fail( -; CHECK-NEXT: [[A:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: [[A:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 12) +; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30)) ; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] ; %a = or <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/uaddo.ll b/llvm/test/Transforms/InstCombine/uaddo.ll index 9b56dce8b45856..ae7a07ec8000c9 100644 --- a/llvm/test/Transforms/InstCombine/uaddo.ll +++ b/llvm/test/Transforms/InstCombine/uaddo.ll @@ -18,7 +18,7 @@ define i32 @uaddo_commute1(i32 %x, i32 %y, i32 %z) { define <2 x i32> @uaddo_commute2(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { ; CHECK-LABEL: @uaddo_commute2( -; CHECK-NEXT: [[NOTY:%.*]] = xor <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[NOTY:%.*]] = xor <2 x i32> [[Y:%.*]], splat (i32 -1) ; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y]], [[X:%.*]] ; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[X]], [[NOTY]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[Z:%.*]], <2 x i32> [[A]] diff --git a/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll b/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll index 824ed4f2b43947..546824680ea131 100644 --- a/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll +++ b/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll @@ -24,7 +24,7 @@ define i64 @test(i64 %X, i1 %Cond ) { define <2 x i32> @PR34856(<2 x i32> %t0, <2 x i32> %t1) { ; CHECK-LABEL: @PR34856( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[T1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[T1:%.*]], splat (i32 -8) ; CHECK-NEXT: [[DIV1:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[DIV1]] ; diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll index 0d87122660cfa1..c8228057eeb10a 100644 --- a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll +++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll @@ -108,7 +108,7 @@ define i1 @umin_cttz_i1_zero_defined(i1 %X) { define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 64) ; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -298,7 +298,7 @@ define i1 @umin_ctlz_i1_zero_defined(i1 %X) { define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) { ; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined( ; CHECK-SAME: <2 x i32> [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 33554432) ; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll b/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll index 35de89a461fa05..0c490549c0f4ea 100644 --- a/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll +++ b/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll @@ -8,8 +8,8 @@ define <2 x i4> @splat (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @splat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -51,7 +51,7 @@ define <2 x i4> @nonsplat (<2 x i4> %x, <2 x i4> %y) { define <2 x i4> @in_constant_varx_mone(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_varx_mone( -; CHECK-NEXT: [[R1:%.*]] = or <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[R1:%.*]] = or <2 x i4> [[X:%.*]], splat (i4 -2) ; CHECK-NEXT: ret <2 x i4> [[R1]] ; %n0 = xor <2 x i4> %x, ; %x @@ -62,7 +62,7 @@ define <2 x i4> @in_constant_varx_mone(<2 x i4> %x, <2 x i4> %mask) { define <2 x i4> @in_constant_varx_14(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_varx_14( -; CHECK-NEXT: [[R1:%.*]] = or <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[R1:%.*]] = or <2 x i4> [[X:%.*]], splat (i4 -2) ; CHECK-NEXT: ret <2 x i4> [[R1]] ; %n0 = xor <2 x i4> %x, ; %x @@ -73,7 +73,7 @@ define <2 x i4> @in_constant_varx_14(<2 x i4> %x, <2 x i4> %mask) { define <2 x i4> @in_constant_varx_14_nonsplat(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_varx_14_nonsplat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -97,7 +97,7 @@ define <3 x i4> @in_constant_varx_14_undef(<3 x i4> %x, <3 x i4> %mask) { define <2 x i4> @in_constant_mone_vary(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_mone_vary( -; CHECK-NEXT: [[R1:%.*]] = or <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[R1:%.*]] = or <2 x i4> [[Y:%.*]], splat (i4 1) ; CHECK-NEXT: ret <2 x i4> [[R1]] ; %n0 = xor <2 x i4> %y, ; %x @@ -108,7 +108,7 @@ define <2 x i4> @in_constant_mone_vary(<2 x i4> %y, <2 x i4> %mask) { define <2 x i4> @in_constant_14_vary(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_14_vary( -; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 -2) ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %y, ; %x @@ -119,7 +119,7 @@ define <2 x i4> @in_constant_14_vary(<2 x i4> %y, <2 x i4> %mask) { define <2 x i4> @in_constant_14_vary_nonsplat(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @in_constant_14_vary_nonsplat( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 -2) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -150,8 +150,8 @@ declare <2 x i4> @gen4() define <2 x i4> @c_1_0_0 (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @c_1_0_0( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -163,8 +163,8 @@ define <2 x i4> @c_1_0_0 (<2 x i4> %x, <2 x i4> %y) { define <2 x i4> @c_0_1_0 (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @c_0_1_0( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -178,8 +178,8 @@ define <2 x i4> @c_0_0_1 () { ; CHECK-LABEL: @c_0_0_1( ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -193,8 +193,8 @@ define <2 x i4> @c_0_0_1 () { define <2 x i4> @c_1_1_0 (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @c_1_1_0( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -207,8 +207,8 @@ define <2 x i4> @c_1_1_0 (<2 x i4> %x, <2 x i4> %y) { define <2 x i4> @c_1_0_1 (<2 x i4> %x) { ; CHECK-LABEL: @c_1_0_1( ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[Y]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -222,8 +222,8 @@ define <2 x i4> @c_1_0_1 (<2 x i4> %x) { define <2 x i4> @c_0_1_1 (<2 x i4> %y) { ; CHECK-LABEL: @c_0_1_1( ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -238,8 +238,8 @@ define <2 x i4> @c_1_1_1 () { ; CHECK-LABEL: @c_1_1_1( ; CHECK-NEXT: [[X:%.*]] = call <2 x i4> @gen4() ; CHECK-NEXT: [[Y:%.*]] = call <2 x i4> @gen4() -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[Y]], splat (i4 -2) +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i4> [[X]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = or disjoint <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -253,7 +253,7 @@ define <2 x i4> @c_1_1_1 () { define <2 x i4> @commutativity_constant_14_vary(<2 x i4> %y, <2 x i4> %mask) { ; CHECK-LABEL: @commutativity_constant_14_vary( -; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[Y:%.*]], splat (i4 -2) ; CHECK-NEXT: ret <2 x i4> [[R]] ; %n0 = xor <2 x i4> %y, ; %x @@ -273,7 +273,7 @@ declare void @use4(<2 x i4>) define <2 x i4> @n_oneuse_D (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @n_oneuse_D( ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], splat (i4 -2) ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] ; CHECK-NEXT: call void @use4(<2 x i4> [[N0]]) ; CHECK-NEXT: ret <2 x i4> [[R]] @@ -288,7 +288,7 @@ define <2 x i4> @n_oneuse_D (<2 x i4> %x, <2 x i4> %y) { define <2 x i4> @n_oneuse_A (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @n_oneuse_A( ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], splat (i4 -2) ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] ; CHECK-NEXT: call void @use4(<2 x i4> [[N1]]) ; CHECK-NEXT: ret <2 x i4> [[R]] @@ -303,7 +303,7 @@ define <2 x i4> @n_oneuse_A (<2 x i4> %x, <2 x i4> %y) { define <2 x i4> @n_oneuse_AD (<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @n_oneuse_AD( ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], splat (i4 -2) ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]] ; CHECK-NEXT: call void @use4(<2 x i4> [[N0]]) ; CHECK-NEXT: call void @use4(<2 x i4> [[N1]]) @@ -337,7 +337,7 @@ define <2 x i4> @n_var_mask (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { define <2 x i4> @n_differenty(<2 x i4> %x, <2 x i4> %mask) { ; CHECK-LABEL: @n_differenty( ; CHECK-NEXT: [[N0:%.*]] = xor <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], +; CHECK-NEXT: [[N1:%.*]] = and <2 x i4> [[N0]], splat (i4 1) ; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[N1]], ; CHECK-NEXT: ret <2 x i4> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll index 17b32670ae9d7b..a25ee985759b8c 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll @@ -26,7 +26,7 @@ define i1 @t0_basic(i8 %x, i8 %y) { define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( -; CHECK-NEXT: [[T0:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[T0:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: call void @use2x8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[R:%.*]] = icmp ule <2 x i8> [[X:%.*]], [[T0]] ; CHECK-NEXT: ret <2 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll index 677ef47456c013..be0b0e0da562fb 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll @@ -21,7 +21,7 @@ define i1 @t0_basic(i8 %x, i8 %y) { define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp ule <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll index 457a0e594b6303..0bd34deb41d5fc 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll @@ -26,7 +26,7 @@ define i1 @t0_basic(i8 %x, i8 %y) { define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( -; CHECK-NEXT: [[T0:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[T0:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: call void @use2x8(<2 x i8> [[T0]]) ; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[T0]] ; CHECK-NEXT: ret <2 x i1> [[R]] diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll index 94966a1eba3289..b4865218dbc143 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll @@ -21,7 +21,7 @@ define i1 @t0_basic(i8 %x, i8 %y) { define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll index 89e065033da19f..5297b3660f8789 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-mul-udiv.ll @@ -23,7 +23,7 @@ define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( ; CHECK-NEXT: [[MUL:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.umul.with.overflow.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) ; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[MUL]], 1 -; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <2 x i1> [[MUL_OV]], +; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <2 x i1> [[MUL_OV]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[MUL_NOT_OV]] ; %t0 = mul <2 x i8> %x, %y diff --git a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll index 241d9cbcde3382..f7f343827e5761 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll @@ -22,7 +22,7 @@ define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( ; CHECK-NEXT: [[MUL:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.umul.with.overflow.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) ; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[MUL]], 1 -; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <2 x i1> [[MUL_OV]], +; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <2 x i1> [[MUL_OV]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[MUL_NOT_OV]] ; %t0 = udiv <2 x i8> , %x @@ -34,7 +34,7 @@ define <3 x i1> @t2_vec_poison(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @t2_vec_poison( ; CHECK-NEXT: [[MUL:%.*]] = call { <3 x i8>, <3 x i1> } @llvm.umul.with.overflow.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]]) ; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { <3 x i8>, <3 x i1> } [[MUL]], 1 -; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <3 x i1> [[MUL_OV]], +; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <3 x i1> [[MUL_OV]], splat (i1 true) ; CHECK-NEXT: ret <3 x i1> [[MUL_NOT_OV]] ; %t0 = udiv <3 x i8> , %x diff --git a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll index ab147584d2108f..1509a42872922a 100644 --- a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll +++ b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll @@ -40,7 +40,7 @@ define i32 @usub_sat_C1_C2_produce_0_too(i32 %a){ ; vector tests define <2 x i16> @usub_sat_C1_C2_splat(<2 x i16> %a) { ; CHECK-LABEL: @usub_sat_C1_C2_splat( -; CHECK-NEXT: [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> , <2 x i16> [[A:%.*]]) +; CHECK-NEXT: [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> splat (i16 50), <2 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <2 x i16> [[COND]] ; %add = sub nuw <2 x i16> , %a diff --git a/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll b/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll index 262942aa1219b8..55bad9a57b8634 100644 --- a/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll +++ b/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll @@ -187,7 +187,7 @@ define i64 @t3_notrunc_redundant_sext(i64 %data, i64 %nbits) { define <2 x i32> @t4_vec(<2 x i64> %data, <2 x i32> %nbits) { ; CHECK-LABEL: @t4_vec( -; CHECK-NEXT: [[SKIP_HIGH:%.*]] = sub <2 x i32> , [[NBITS:%.*]] +; CHECK-NEXT: [[SKIP_HIGH:%.*]] = sub <2 x i32> splat (i32 64), [[NBITS:%.*]] ; CHECK-NEXT: [[SKIP_HIGH_WIDE:%.*]] = zext nneg <2 x i32> [[SKIP_HIGH]] to <2 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[DATA:%.*]], [[SKIP_HIGH_WIDE]] ; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll index f78a7055ff2f6c..644f13f8eee636 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll @@ -641,7 +641,7 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) { ;; indices. define ptr @PR41624(<2 x ptr> %a) { ; CHECK-LABEL: @PR41624( -; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0 ; CHECK-NEXT: ret ptr [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll index 1fd7903307cef4..a4b55e2b6cfec1 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -644,7 +644,7 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) { ;; indices. define ptr @PR41624(<2 x ptr> %a) { ; CHECK-LABEL: @PR41624( -; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0 ; CHECK-NEXT: ret ptr [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll index c43def83f58aca..ecd5e49bd6b28e 100644 --- a/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll @@ -97,7 +97,7 @@ define void @nocopy(i64 %val, i32 %limit, ptr %ptr) { ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[ELT]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]] ; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4 -; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], +; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], splat (i32 16) ; CHECK-NEXT: br i1 [[END]], label [[LOOP]], label [[RET:%.*]] ; CHECK: ret: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll index 84657c9d43896d..ef093709a98f58 100644 --- a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll +++ b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll @@ -97,7 +97,7 @@ define void @nocopy(i64 %val, i32 %limit, ptr %ptr) { ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[ELT]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]] ; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4 -; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], +; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], splat (i32 16) ; CHECK-NEXT: br i1 [[END]], label [[LOOP]], label [[RET:%.*]] ; CHECK: ret: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/vec_sext.ll b/llvm/test/Transforms/InstCombine/vec_sext.ll index 9f5f957f494452..925f491d298410 100644 --- a/llvm/test/Transforms/InstCombine/vec_sext.ll +++ b/llvm/test/Transforms/InstCombine/vec_sext.ll @@ -44,7 +44,7 @@ define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) define <2 x i32> @is_negative_poison_elt(<2 x i32> %a) { ; CHECK-LABEL: @is_negative_poison_elt( -; CHECK-NEXT: [[A_LOBIT:%.*]] = ashr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[A_LOBIT:%.*]] = ashr <2 x i32> [[A:%.*]], splat (i32 31) ; CHECK-NEXT: ret <2 x i32> [[A_LOBIT]] ; %cmp = icmp slt <2 x i32> %a, diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll index 0f233fbb4729e6..80fe4b78fe8a9f 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll @@ -264,7 +264,7 @@ define <3 x i32> @add_wider(i32 %y, i32 %z) { ; CHECK-LABEL: @add_wider( ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1 -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[I1]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[I1]], splat (i32 255) ; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT]] ; @@ -281,7 +281,7 @@ define <3 x i32> @div_wider(i32 %y, i32 %z) { ; CHECK-LABEL: @div_wider( ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1 -; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i32> [[I1]], +; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i32> [[I1]], splat (i32 255) ; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT]] ; @@ -378,7 +378,7 @@ define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) { define <1 x i32> @test16a(i32 %ele) { ; CHECK-LABEL: @test16a( -; CHECK-NEXT: ret <1 x i32> +; CHECK-NEXT: ret <1 x i32> splat (i32 2) ; %t0 = insertelement <2 x i32> , i32 %ele, i32 1 %t1 = shl <2 x i32> %t0, @@ -388,7 +388,7 @@ define <1 x i32> @test16a(i32 %ele) { define <4 x i8> @test16b(i8 %ele) { ; CHECK-LABEL: @test16b( -; CHECK-NEXT: ret <4 x i8> +; CHECK-NEXT: ret <4 x i8> splat (i8 2) ; %t0 = insertelement <8 x i8> , i8 %ele, i32 6 %t1 = shl <8 x i8> %t0, @@ -702,7 +702,7 @@ define <4 x i16> @widening_shuffle_or(<2 x i16> %v) { define <4 x i32> @shuffle_17add2(<4 x i32> %v) { ; CHECK-LABEL: @shuffle_17add2( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %t1 = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> @@ -936,7 +936,7 @@ define <2 x i32> @lshr_splat_constant1(<2 x i32> %x) { define <2 x i32> @urem_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @urem_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = urem <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = urem <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> zeroinitializer @@ -958,7 +958,7 @@ define <2 x i32> @urem_splat_constant1(<2 x i32> %x) { define <2 x i32> @srem_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @srem_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = srem <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = srem <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> zeroinitializer @@ -980,7 +980,7 @@ define <2 x i32> @srem_splat_constant1(<2 x i32> %x) { define <2 x i32> @udiv_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @udiv_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = udiv <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = udiv <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> zeroinitializer @@ -1002,7 +1002,7 @@ define <2 x i32> @udiv_splat_constant1(<2 x i32> %x) { define <2 x i32> @sdiv_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @sdiv_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = sdiv <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = sdiv <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> zeroinitializer @@ -1703,7 +1703,7 @@ define <3 x i8> @splat_assoc_or(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @splat_assoc_fdiv( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: [[R:%.*]] = fdiv reassoc nsz <2 x float> [[A]], [[SPLATX]] ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -1718,7 +1718,7 @@ define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) { define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @splat_assoc_fadd( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: call void @use(<2 x float> [[A]]) ; CHECK-NEXT: [[R:%.*]] = fadd fast <2 x float> [[A]], [[SPLATX]] ; CHECK-NEXT: ret <2 x float> [[R]] @@ -1735,7 +1735,7 @@ define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) { define <3 x i32> @splat_assoc_and(<4 x i32> %x, <3 x i32> %y) { ; CHECK-LABEL: @splat_assoc_and( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <3 x i32> zeroinitializer -; CHECK-NEXT: [[A:%.*]] = and <3 x i32> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = and <3 x i32> [[Y:%.*]], splat (i32 42) ; CHECK-NEXT: [[R:%.*]] = and <3 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <3 x i32> [[R]] ; @@ -1751,7 +1751,7 @@ define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) { ; CHECK-LABEL: @splat_assoc_xor( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <5 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = xor <5 x i32> [[Y:%.*]], [[SPLATX]] -; CHECK-NEXT: [[R:%.*]] = xor <5 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = xor <5 x i32> [[TMP1]], splat (i32 42) ; CHECK-NEXT: ret <5 x i32> [[R]] ; %splatx = shufflevector <4 x i32> %x, <4 x i32> poison, <5 x i32> zeroinitializer @@ -1765,7 +1765,7 @@ define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) { define <4 x i32> @splat_assoc_add_mul(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_mul( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], splat (i32 42) ; CHECK-NEXT: [[R:%.*]] = mul <4 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index 39a9db02eef293..163d9c9557b239 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -254,7 +254,7 @@ define <3 x i32> @add_wider(i32 %y, i32 %z) { ; CHECK-LABEL: @add_wider( ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1 -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[I1]], +; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[I1]], splat (i32 255) ; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT]] ; @@ -271,7 +271,7 @@ define <3 x i32> @div_wider(i32 %y, i32 %z) { ; CHECK-LABEL: @div_wider( ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1 -; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i32> [[I1]], +; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i32> [[I1]], splat (i32 255) ; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT]] ; @@ -368,7 +368,7 @@ define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) { define <1 x i32> @test16a(i32 %ele) { ; CHECK-LABEL: @test16a( -; CHECK-NEXT: ret <1 x i32> +; CHECK-NEXT: ret <1 x i32> splat (i32 2) ; %t0 = insertelement <2 x i32> , i32 %ele, i32 1 %t1 = shl <2 x i32> %t0, @@ -378,7 +378,7 @@ define <1 x i32> @test16a(i32 %ele) { define <4 x i8> @test16b(i8 %ele) { ; CHECK-LABEL: @test16b( -; CHECK-NEXT: ret <4 x i8> +; CHECK-NEXT: ret <4 x i8> splat (i8 2) ; %t0 = insertelement <8 x i8> , i8 %ele, i32 6 %t1 = shl <8 x i8> %t0, @@ -696,7 +696,7 @@ define <4 x i16> @widening_shuffle_or(<2 x i16> %v) { define <4 x i32> @shuffle_17add2(<4 x i32> %v) { ; CHECK-LABEL: @shuffle_17add2( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %t1 = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> @@ -930,7 +930,7 @@ define <2 x i32> @lshr_splat_constant1(<2 x i32> %x) { define <2 x i32> @urem_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @urem_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = urem <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = urem <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer @@ -952,7 +952,7 @@ define <2 x i32> @urem_splat_constant1(<2 x i32> %x) { define <2 x i32> @srem_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @srem_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = srem <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = srem <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer @@ -974,7 +974,7 @@ define <2 x i32> @srem_splat_constant1(<2 x i32> %x) { define <2 x i32> @udiv_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @udiv_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = udiv <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = udiv <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer @@ -996,7 +996,7 @@ define <2 x i32> @udiv_splat_constant1(<2 x i32> %x) { define <2 x i32> @sdiv_splat_constant0(<2 x i32> %x) { ; CHECK-LABEL: @sdiv_splat_constant0( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = sdiv <2 x i32> , [[SPLAT]] +; CHECK-NEXT: [[R:%.*]] = sdiv <2 x i32> splat (i32 42), [[SPLAT]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer @@ -1708,7 +1708,7 @@ define <3 x i8> @splat_assoc_or(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @splat_assoc_fdiv( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: [[R:%.*]] = fdiv reassoc nsz <2 x float> [[A]], [[SPLATX]] ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -1723,7 +1723,7 @@ define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) { define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @splat_assoc_fadd( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: call void @use(<2 x float> [[A]]) ; CHECK-NEXT: [[R:%.*]] = fadd fast <2 x float> [[A]], [[SPLATX]] ; CHECK-NEXT: ret <2 x float> [[R]] @@ -1740,7 +1740,7 @@ define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) { define <3 x i32> @splat_assoc_and(<4 x i32> %x, <3 x i32> %y) { ; CHECK-LABEL: @splat_assoc_and( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <3 x i32> zeroinitializer -; CHECK-NEXT: [[A:%.*]] = and <3 x i32> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = and <3 x i32> [[Y:%.*]], splat (i32 42) ; CHECK-NEXT: [[R:%.*]] = and <3 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <3 x i32> [[R]] ; @@ -1756,7 +1756,7 @@ define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) { ; CHECK-LABEL: @splat_assoc_xor( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <5 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = xor <5 x i32> [[Y:%.*]], [[SPLATX]] -; CHECK-NEXT: [[R:%.*]] = xor <5 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = xor <5 x i32> [[TMP1]], splat (i32 42) ; CHECK-NEXT: ret <5 x i32> [[R]] ; %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <5 x i32> zeroinitializer @@ -1770,7 +1770,7 @@ define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) { define <4 x i32> @splat_assoc_add_mul(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_mul( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], splat (i32 42) ; CHECK-NEXT: [[R:%.*]] = mul <4 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -2390,7 +2390,7 @@ define <2 x i32> @foldselect0(i1 %c) { ; Make sure we do not crash in this case. define <4 x float> @shuf_larger_length_vec_select(<2 x i1> %cond) { ; CHECK-LABEL: @shuf_larger_length_vec_select( -; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x float> zeroinitializer, <2 x float> +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x float> zeroinitializer, <2 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[SEL]], <2 x float> zeroinitializer, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SHUF]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_udiv_to_shift.ll b/llvm/test/Transforms/InstCombine/vec_udiv_to_shift.ll index 116d531050111f..ea05d05bba9940 100644 --- a/llvm/test/Transforms/InstCombine/vec_udiv_to_shift.ll +++ b/llvm/test/Transforms/InstCombine/vec_udiv_to_shift.ll @@ -1,16 +1,26 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s define <8 x i16> @udiv_vec8x16(<8 x i16> %var) { +; CHECK-LABEL: define <8 x i16> @udiv_vec8x16( +; CHECK-SAME: <8 x i16> [[VAR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i16> [[VAR]], splat (i16 5) +; CHECK-NEXT: ret <8 x i16> [[TMP0]] +; entry: -; CHECK: lshr <8 x i16> %var, %0 = udiv <8 x i16> %var, ret <8 x i16> %0 } define <4 x i32> @udiv_vec4x32(<4 x i32> %var) { +; CHECK-LABEL: define <4 x i32> @udiv_vec4x32( +; CHECK-SAME: <4 x i32> [[VAR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[VAR]], splat (i32 4) +; CHECK-NEXT: ret <4 x i32> [[TMP0]] +; entry: -; CHECK: lshr <4 x i32> %var, %0 = udiv <4 x i32> %var, ret <4 x i32> %0 } diff --git a/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll index a87364600ba308..5770e2ce72020d 100644 --- a/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll @@ -52,8 +52,8 @@ define <2 x i1> @and_cmp_is_trunc_even_with_poison_elts(<2 x i64> %a) { ; The ashr turns into an lshr. define <2 x i64> @test2(<2 x i64> %a) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i64> [[A:%.*]], -; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[B]], +; CHECK-NEXT: [[B:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 1) +; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[B]], splat (i64 32767) ; CHECK-NEXT: ret <2 x i64> [[T]] ; %b = and <2 x i64> %a, @@ -151,7 +151,7 @@ define <2 x i64> @test7(<4 x float> %a, <4 x float> %b) { define void @convert(ptr %dst.addr, <2 x i64> %src) { ; CHECK-LABEL: @convert( ; CHECK-NEXT: [[VAL:%.*]] = trunc <2 x i64> [[SRC:%.*]] to <2 x i32> -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[VAL]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[VAL]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[ADD]], ptr [[DST_ADDR:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -163,7 +163,7 @@ define void @convert(ptr %dst.addr, <2 x i64> %src) { define <2 x i65> @foo(<2 x i64> %t) { ; CHECK-LABEL: @foo( -; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], +; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], splat (i64 4294967295) ; CHECK-NEXT: [[B:%.*]] = zext nneg <2 x i64> [[A_MASK]] to <2 x i65> ; CHECK-NEXT: ret <2 x i65> [[B]] ; @@ -175,7 +175,7 @@ define <2 x i65> @foo(<2 x i64> %t) { define <2 x i64> @bar(<2 x i65> %t) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64> -; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], splat (i64 4294967295) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i65> %t to <2 x i32> @@ -196,8 +196,8 @@ define <2 x i64> @bars(<2 x i65> %t) { define <2 x i64> @quxs(<2 x i64> %t) { ; CHECK-LABEL: @quxs( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], splat (i64 32) +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], splat (i64 32) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i64> %t to <2 x i32> @@ -207,8 +207,8 @@ define <2 x i64> @quxs(<2 x i64> %t) { define <2 x i64> @quxt(<2 x i64> %t) { ; CHECK-LABEL: @quxt( -; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[T:%.*]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], +; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[T:%.*]], splat (i64 32) +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], splat (i64 32) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = shl <2 x i64> %t, @@ -278,7 +278,7 @@ define <4 x float> @f(i32 %a) { define <8 x i32> @pr24458(<8 x float> %n) { ; CHECK-LABEL: @pr24458( -; CHECK-NEXT: ret <8 x i32> +; CHECK-NEXT: ret <8 x i32> splat (i32 -1) ; %notequal_b_load_.i = fcmp une <8 x float> %n, zeroinitializer %equal_a_load72_.i = fcmp ueq <8 x float> %n, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/vector-casts.ll b/llvm/test/Transforms/InstCombine/vector-casts.ll index fd2a4ffdfb7092..56e957d5430789 100644 --- a/llvm/test/Transforms/InstCombine/vector-casts.ll +++ b/llvm/test/Transforms/InstCombine/vector-casts.ll @@ -52,8 +52,8 @@ define <2 x i1> @and_cmp_is_trunc_even_with_poison_elts(<2 x i64> %a) { ; The ashr turns into an lshr. define <2 x i64> @test2(<2 x i64> %a) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[B:%.*]] = lshr <2 x i64> [[A:%.*]], -; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[B]], +; CHECK-NEXT: [[B:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 1) +; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[B]], splat (i64 32767) ; CHECK-NEXT: ret <2 x i64> [[T]] ; %b = and <2 x i64> %a, @@ -151,7 +151,7 @@ define <2 x i64> @test7(<4 x float> %a, <4 x float> %b) { define void @convert(ptr %dst.addr, <2 x i64> %src) { ; CHECK-LABEL: @convert( ; CHECK-NEXT: [[VAL:%.*]] = trunc <2 x i64> [[SRC:%.*]] to <2 x i32> -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[VAL]], +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[VAL]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[ADD]], ptr [[DST_ADDR:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -163,7 +163,7 @@ define void @convert(ptr %dst.addr, <2 x i64> %src) { define <2 x i65> @foo(<2 x i64> %t) { ; CHECK-LABEL: @foo( -; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], +; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], splat (i64 4294967295) ; CHECK-NEXT: [[B:%.*]] = zext nneg <2 x i64> [[A_MASK]] to <2 x i65> ; CHECK-NEXT: ret <2 x i65> [[B]] ; @@ -175,7 +175,7 @@ define <2 x i65> @foo(<2 x i64> %t) { define <2 x i64> @bar(<2 x i65> %t) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64> -; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], splat (i64 4294967295) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i65> %t to <2 x i32> @@ -196,8 +196,8 @@ define <2 x i64> @bars(<2 x i65> %t) { define <2 x i64> @quxs(<2 x i64> %t) { ; CHECK-LABEL: @quxs( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], splat (i64 32) +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], splat (i64 32) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i64> %t to <2 x i32> @@ -207,8 +207,8 @@ define <2 x i64> @quxs(<2 x i64> %t) { define <2 x i64> @quxt(<2 x i64> %t) { ; CHECK-LABEL: @quxt( -; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[T:%.*]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], +; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[T:%.*]], splat (i64 32) +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], splat (i64 32) ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = shl <2 x i64> %t, @@ -278,7 +278,7 @@ define <4 x float> @f(i32 %a) { define <8 x i32> @pr24458(<8 x float> %n) { ; CHECK-LABEL: @pr24458( -; CHECK-NEXT: ret <8 x i32> +; CHECK-NEXT: ret <8 x i32> splat (i32 -1) ; %notequal_b_load_.i = fcmp une <8 x float> %n, zeroinitializer %equal_a_load72_.i = fcmp ueq <8 x float> %n, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/vector-mul.ll b/llvm/test/Transforms/InstCombine/vector-mul.ll index 4640e19011b01e..2b66a8b12bfce7 100644 --- a/llvm/test/Transforms/InstCombine/vector-mul.ll +++ b/llvm/test/Transforms/InstCombine/vector-mul.ll @@ -27,7 +27,7 @@ entry: define <4 x i8> @AddToSelf_i8(<4 x i8> %InVec) { ; CHECK-LABEL: @AddToSelf_i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], splat (i8 1) ; CHECK-NEXT: ret <4 x i8> [[MUL]] ; entry: @@ -38,7 +38,7 @@ entry: define <4 x i8> @SplatPow2Test1_i8(<4 x i8> %InVec) { ; CHECK-LABEL: @SplatPow2Test1_i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], splat (i8 2) ; CHECK-NEXT: ret <4 x i8> [[MUL]] ; entry: @@ -49,7 +49,7 @@ entry: define <4 x i8> @SplatPow2Test2_i8(<4 x i8> %InVec) { ; CHECK-LABEL: @SplatPow2Test2_i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], splat (i8 3) ; CHECK-NEXT: ret <4 x i8> [[MUL]] ; entry: @@ -71,7 +71,7 @@ entry: define <4 x i8> @MulTest2_i8(<4 x i8> %InVec) { ; CHECK-LABEL: @MulTest2_i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], splat (i8 3) ; CHECK-NEXT: ret <4 x i8> [[MUL]] ; entry: @@ -124,7 +124,7 @@ entry: define <4 x i16> @AddToSelf_i16(<4 x i16> %InVec) { ; CHECK-LABEL: @AddToSelf_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], splat (i16 1) ; CHECK-NEXT: ret <4 x i16> [[MUL]] ; entry: @@ -135,7 +135,7 @@ entry: define <4 x i16> @SplatPow2Test1_i16(<4 x i16> %InVec) { ; CHECK-LABEL: @SplatPow2Test1_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], splat (i16 2) ; CHECK-NEXT: ret <4 x i16> [[MUL]] ; entry: @@ -146,7 +146,7 @@ entry: define <4 x i16> @SplatPow2Test2_i16(<4 x i16> %InVec) { ; CHECK-LABEL: @SplatPow2Test2_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], splat (i16 3) ; CHECK-NEXT: ret <4 x i16> [[MUL]] ; entry: @@ -168,7 +168,7 @@ entry: define <4 x i16> @MulTest2_i16(<4 x i16> %InVec) { ; CHECK-LABEL: @MulTest2_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], splat (i16 3) ; CHECK-NEXT: ret <4 x i16> [[MUL]] ; entry: @@ -221,7 +221,7 @@ entry: define <4 x i32> @AddToSelf_i32(<4 x i32> %InVec) { ; CHECK-LABEL: @AddToSelf_i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i32> [[MUL]] ; entry: @@ -232,7 +232,7 @@ entry: define <4 x i32> @SplatPow2Test1_i32(<4 x i32> %InVec) { ; CHECK-LABEL: @SplatPow2Test1_i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], splat (i32 2) ; CHECK-NEXT: ret <4 x i32> [[MUL]] ; entry: @@ -243,7 +243,7 @@ entry: define <4 x i32> @SplatPow2Test2_i32(<4 x i32> %InVec) { ; CHECK-LABEL: @SplatPow2Test2_i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], splat (i32 3) ; CHECK-NEXT: ret <4 x i32> [[MUL]] ; entry: @@ -265,7 +265,7 @@ entry: define <4 x i32> @MulTest2_i32(<4 x i32> %InVec) { ; CHECK-LABEL: @MulTest2_i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], splat (i32 3) ; CHECK-NEXT: ret <4 x i32> [[MUL]] ; entry: @@ -318,7 +318,7 @@ entry: define <4 x i64> @AddToSelf_i64(<4 x i64> %InVec) { ; CHECK-LABEL: @AddToSelf_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], splat (i64 1) ; CHECK-NEXT: ret <4 x i64> [[MUL]] ; entry: @@ -329,7 +329,7 @@ entry: define <4 x i64> @SplatPow2Test1_i64(<4 x i64> %InVec) { ; CHECK-LABEL: @SplatPow2Test1_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], splat (i64 2) ; CHECK-NEXT: ret <4 x i64> [[MUL]] ; entry: @@ -340,7 +340,7 @@ entry: define <4 x i64> @SplatPow2Test2_i64(<4 x i64> %InVec) { ; CHECK-LABEL: @SplatPow2Test2_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], splat (i64 3) ; CHECK-NEXT: ret <4 x i64> [[MUL]] ; entry: @@ -362,7 +362,7 @@ entry: define <4 x i64> @MulTest2_i64(<4 x i64> %InVec) { ; CHECK-LABEL: @MulTest2_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], splat (i64 3) ; CHECK-NEXT: ret <4 x i64> [[MUL]] ; entry: @@ -399,7 +399,7 @@ entry: define <4 x i8> @ShiftMulTest1(<4 x i8> %InVec) { ; CHECK-LABEL: @ShiftMulTest1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], splat (i8 12) ; CHECK-NEXT: ret <4 x i8> [[MUL]] ; entry: @@ -411,7 +411,7 @@ entry: define <4 x i16> @ShiftMulTest2(<4 x i16> %InVec) { ; CHECK-LABEL: @ShiftMulTest2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], splat (i16 12) ; CHECK-NEXT: ret <4 x i16> [[MUL]] ; entry: @@ -423,7 +423,7 @@ entry: define <4 x i32> @ShiftMulTest3(<4 x i32> %InVec) { ; CHECK-LABEL: @ShiftMulTest3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], splat (i32 12) ; CHECK-NEXT: ret <4 x i32> [[MUL]] ; entry: @@ -435,7 +435,7 @@ entry: define <4 x i64> @ShiftMulTest4(<4 x i64> %InVec) { ; CHECK-LABEL: @ShiftMulTest4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], splat (i64 12) ; CHECK-NEXT: ret <4 x i64> [[MUL]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll index 65d00083532621..5e141f6aacaf2b 100644 --- a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll +++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll @@ -16,7 +16,7 @@ define i1 @vec_reduce_umax_non_zero(<4 x i8> %xx) { define i1 @vec_reduce_umax_non_zero_fail(<4 x i8> %xx) { ; CHECK-LABEL: @vec_reduce_umax_non_zero_fail( -; CHECK-NEXT: [[X:%.*]] = add nsw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nsw <4 x i8> [[XX:%.*]], splat (i8 1) ; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]]) ; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 ; CHECK-NEXT: ret i1 [[R]] diff --git a/llvm/test/Transforms/InstCombine/vector-trunc.ll b/llvm/test/Transforms/InstCombine/vector-trunc.ll index bccb12e66eba19..85d6de4a3977c2 100644 --- a/llvm/test/Transforms/InstCombine/vector-trunc.ll +++ b/llvm/test/Transforms/InstCombine/vector-trunc.ll @@ -3,9 +3,9 @@ define <4 x i16> @trunc_add_nsw(<4 x i32> %0) { ; CHECK-LABEL: @trunc_add_nsw( -; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP0:%.*]], splat (i32 17) ; CHECK-NEXT: [[TMP3:%.*]] = trunc nsw <4 x i32> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[TMP3]], splat (i16 1) ; CHECK-NEXT: ret <4 x i16> [[TMP4]] ; %2 = ashr <4 x i32> %0, @@ -16,9 +16,9 @@ define <4 x i16> @trunc_add_nsw(<4 x i32> %0) { define <4 x i16> @trunc_add_no_nsw(<4 x i32> %0) { ; CHECK-LABEL: @trunc_add_no_nsw( -; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP0:%.*]], splat (i32 16) ; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i32> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP3]], splat (i16 1) ; CHECK-NEXT: ret <4 x i16> [[TMP4]] ; %2 = ashr <4 x i32> %0, @@ -31,7 +31,7 @@ define <4 x i16> @trunc_add_mixed(<4 x i32> %0) { ; CHECK-LABEL: @trunc_add_mixed( ; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP0:%.*]], ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP3]], splat (i16 1) ; CHECK-NEXT: ret <4 x i16> [[TMP4]] ; %2 = ashr <4 x i32> %0, diff --git a/llvm/test/Transforms/InstCombine/vector-udiv.ll b/llvm/test/Transforms/InstCombine/vector-udiv.ll index 0289b7c70cc4fb..dd4976f12a036a 100644 --- a/llvm/test/Transforms/InstCombine/vector-udiv.ll +++ b/llvm/test/Transforms/InstCombine/vector-udiv.ll @@ -3,7 +3,7 @@ define <4 x i32> @test_v4i32_splatconst_pow2(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_splatconst_pow2( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = udiv <4 x i32> %a0, @@ -22,7 +22,7 @@ define <4 x i32> @test_v4i32_const_pow2(<4 x i32> %a0) { ; X udiv C, where C >= signbit define <4 x i32> @test_v4i32_negconstsplat(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_negconstsplat( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[A0:%.*]], splat (i32 -4) ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -51,7 +51,7 @@ define <4 x i32> @test_v4i32_negconst_undef(<4 x i32> %a0) { ; X udiv (C1 << N), where C1 is "1< X >> (N+C2) define <4 x i32> @test_v4i32_shl_splatconst_pow2(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @test_v4i32_shl_splatconst_pow2( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A1:%.*]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[A0:%.*]], [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -74,7 +74,7 @@ define <4 x i32> @test_v4i32_shl_const_pow2(<4 x i32> %a0, <4 x i32> %a1) { ; X udiv (zext (C1 << N)), where C1 is "1< X >> (N+C2) define <4 x i32> @test_v4i32_zext_shl_splatconst_pow2(<4 x i32> %a0, <4 x i16> %a1) { ; CHECK-LABEL: @test_v4i32_zext_shl_splatconst_pow2( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[A1:%.*]], splat (i16 2) ; CHECK-NEXT: [[TMP2:%.*]] = zext nneg <4 x i16> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[A0:%.*]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[TMP3]] diff --git a/llvm/test/Transforms/InstCombine/vector-urem.ll b/llvm/test/Transforms/InstCombine/vector-urem.ll index 627789a03ef6ca..d087bbbe06abf6 100644 --- a/llvm/test/Transforms/InstCombine/vector-urem.ll +++ b/llvm/test/Transforms/InstCombine/vector-urem.ll @@ -3,7 +3,7 @@ define <4 x i32> @test_v4i32_splatconst_pow2(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_splatconst_pow2( -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = urem <4 x i32> %a0, @@ -29,7 +29,7 @@ define <4 x i32> @test_v4i32_const_pow2_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_one(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_one( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -39,7 +39,7 @@ define <4 x i32> @test_v4i32_one(<4 x i32> %a0) { define <4 x i32> @test_v4i32_one_poison(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_one_poison( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -50,8 +50,8 @@ define <4 x i32> @test_v4i32_one_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_negconstsplat(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_negconstsplat( ; CHECK-NEXT: [[A0_FR:%.*]] = freeze <4 x i32> [[A0:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[A0_FR]], -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A0_FR]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[A0_FR]], splat (i32 -3) +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A0_FR]], splat (i32 3) ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0_FR]], <4 x i32> [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; diff --git a/llvm/test/Transforms/InstCombine/vector-xor.ll b/llvm/test/Transforms/InstCombine/vector-xor.ll index 13894ef85b5da8..3b37684df9e36a 100644 --- a/llvm/test/Transforms/InstCombine/vector-xor.ll +++ b/llvm/test/Transforms/InstCombine/vector-xor.ll @@ -33,7 +33,7 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) define <4 x i32> @test_v4i32_xor_bswap_splatconst(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_xor_bswap_splatconst( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A0:%.*]], splat (i32 -16777216) ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; @@ -68,7 +68,7 @@ define <4 x i32> @test_v4i32_xor_bswap_const_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_demorgan_and(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @test_v4i32_demorgan_and( -; CHECK-NEXT: [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], +; CHECK-NEXT: [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], splat (i32 -1) ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[X:%.*]], [[Y_NOT]] ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; @@ -82,7 +82,7 @@ define <4 x i32> @test_v4i32_demorgan_and(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @test_v4i32_demorgan_or(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @test_v4i32_demorgan_or( -; CHECK-NEXT: [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], +; CHECK-NEXT: [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], splat (i32 -1) ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], [[Y_NOT]] ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; @@ -120,7 +120,7 @@ define <4 x i32> @test_v4i32_not_ashr_not_poison(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @test_v4i32_not_ashr_negative_splatconst(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_not_ashr_negative_splatconst( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> , [[A0:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> splat (i32 2), [[A0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = ashr <4 x i32> , %a0 @@ -152,7 +152,7 @@ define <4 x i32> @test_v4i32_not_ashr_negative_const_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_not_lshr_nonnegative_splatconst(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_not_lshr_nonnegative_splatconst( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> , [[A0:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> splat (i32 -4), [[A0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = lshr <4 x i32> , %a0 @@ -184,7 +184,7 @@ define <4 x i32> @test_v4i32_not_lshr_nonnegative_const_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_not_sub_splatconst(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_not_sub_splatconst( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], splat (i32 -4) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = sub <4 x i32> , %a0 @@ -216,7 +216,7 @@ define <4 x i32> @test_v4i32_not_sub_const_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_xor_signmask_sub_splatconst(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_xor_signmask_sub_splatconst( -; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> , [[A0:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> splat (i32 -2147483645), [[A0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = sub <4 x i32> , %a0 @@ -227,7 +227,7 @@ define <4 x i32> @test_v4i32_xor_signmask_sub_splatconst(<4 x i32> %a0) { define <4 x i32> @test_v4i32_xor_signmask_sub_const(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_xor_signmask_sub_const( ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> , [[A0:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 -2147483648) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = sub <4 x i32> , %a0 @@ -250,7 +250,7 @@ define <4 x i32> @test_v4i32_xor_signmask_sub_const_poison(<4 x i32> %a0) { define <4 x i32> @test_v4i32_xor_signmask_add_splatconst(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_xor_signmask_add_splatconst( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], splat (i32 -2147483645) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = add <4 x i32> , %a0 @@ -261,7 +261,7 @@ define <4 x i32> @test_v4i32_xor_signmask_add_splatconst(<4 x i32> %a0) { define <4 x i32> @test_v4i32_xor_signmask_add_const(<4 x i32> %a0) { ; CHECK-LABEL: @test_v4i32_xor_signmask_add_const( ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 -2147483648) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = add <4 x i32> , %a0 diff --git a/llvm/test/Transforms/InstCombine/with_overflow.ll b/llvm/test/Transforms/InstCombine/with_overflow.ll index 4e3cb34931d07e..fa810408730e1b 100644 --- a/llvm/test/Transforms/InstCombine/with_overflow.ll +++ b/llvm/test/Transforms/InstCombine/with_overflow.ll @@ -616,7 +616,7 @@ declare { <4 x i8>, <4 x i1> } @llvm.umul.with.overflow.v4i8(<4 x i8>, <4 x i8>) define { <4 x i8>, <4 x i1> } @always_sadd_const_vector() nounwind { ; CHECK-LABEL: @always_sadd_const_vector( -; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> , <4 x i1> } +; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> splat (i8 -128), <4 x i1> splat (i1 true) } ; %x = call { <4 x i8>, <4 x i1> } @llvm.sadd.with.overflow.v4i8(<4 x i8> , <4 x i8> ) ret { <4 x i8>, <4 x i1> } %x @@ -624,7 +624,7 @@ define { <4 x i8>, <4 x i1> } @always_sadd_const_vector() nounwind { define { <4 x i8>, <4 x i1> } @always_uadd_const_vector() nounwind { ; CHECK-LABEL: @always_uadd_const_vector( -; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> zeroinitializer, <4 x i1> } +; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> zeroinitializer, <4 x i1> splat (i1 true) } ; %x = call { <4 x i8>, <4 x i1> } @llvm.uadd.with.overflow.v4i8(<4 x i8> , <4 x i8> ) ret { <4 x i8>, <4 x i1> } %x @@ -632,7 +632,7 @@ define { <4 x i8>, <4 x i1> } @always_uadd_const_vector() nounwind { define { <4 x i8>, <4 x i1> } @always_ssub_const_vector() nounwind { ; CHECK-LABEL: @always_ssub_const_vector( -; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> , <4 x i1> } +; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> splat (i8 127), <4 x i1> splat (i1 true) } ; %x = call { <4 x i8>, <4 x i1> } @llvm.ssub.with.overflow.v4i8(<4 x i8> , <4 x i8> ) ret { <4 x i8>, <4 x i1> } %x @@ -640,7 +640,7 @@ define { <4 x i8>, <4 x i1> } @always_ssub_const_vector() nounwind { define { <4 x i8>, <4 x i1> } @always_usub_const_vector() nounwind { ; CHECK-LABEL: @always_usub_const_vector( -; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> , <4 x i1> } +; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> splat (i8 -1), <4 x i1> splat (i1 true) } ; %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> , <4 x i8> ) ret { <4 x i8>, <4 x i1> } %x @@ -649,7 +649,7 @@ define { <4 x i8>, <4 x i1> } @always_usub_const_vector() nounwind { ; NOTE: LLVM doesn't (yet) detect the multiplication always results in a overflow define { <4 x i8>, <4 x i1> } @always_smul_const_vector() nounwind { ; CHECK-LABEL: @always_smul_const_vector( -; CHECK-NEXT: [[X:%.*]] = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> , <4 x i8> ) +; CHECK-NEXT: [[X:%.*]] = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> splat (i8 127), <4 x i8> splat (i8 3)) ; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } [[X]] ; %x = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> , <4 x i8> ) @@ -658,7 +658,7 @@ define { <4 x i8>, <4 x i1> } @always_smul_const_vector() nounwind { define { <4 x i8>, <4 x i1> } @always_umul_const_vector() nounwind { ; CHECK-LABEL: @always_umul_const_vector( -; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> , <4 x i1> } +; CHECK-NEXT: ret { <4 x i8>, <4 x i1> } { <4 x i8> splat (i8 -3), <4 x i1> splat (i1 true) } ; %x = call { <4 x i8>, <4 x i1> } @llvm.umul.with.overflow.v4i8(<4 x i8> , <4 x i8> ) ret { <4 x i8>, <4 x i1> } %x @@ -781,7 +781,7 @@ define i8 @smul_neg1(i8 %x, ptr %p) { define <4 x i8> @smul_neg1_vec(<4 x i8> %x, ptr %p) { ; CHECK-LABEL: @smul_neg1_vec( ; CHECK-NEXT: [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp eq <4 x i8> [[X]], +; CHECK-NEXT: [[OV:%.*]] = icmp eq <4 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: store <4 x i1> [[OV]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -795,7 +795,7 @@ define <4 x i8> @smul_neg1_vec(<4 x i8> %x, ptr %p) { define <4 x i8> @smul_neg1_vec_poison(<4 x i8> %x, ptr %p) { ; CHECK-LABEL: @smul_neg1_vec_poison( ; CHECK-NEXT: [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp eq <4 x i8> [[X]], +; CHECK-NEXT: [[OV:%.*]] = icmp eq <4 x i8> [[X]], splat (i8 -128) ; CHECK-NEXT: store <4 x i1> [[OV]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -838,7 +838,7 @@ define i8 @umul_neg1(i8 %x, ptr %p) { define <4 x i8> @umul_neg1_vec(<4 x i8> %x, ptr %p) { ; CHECK-LABEL: @umul_neg1_vec( ; CHECK-NEXT: [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], +; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], splat (i8 1) ; CHECK-NEXT: store <4 x i1> [[OV]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -852,7 +852,7 @@ define <4 x i8> @umul_neg1_vec(<4 x i8> %x, ptr %p) { define <4 x i8> @umul_neg1_vec_poison(<4 x i8> %x, ptr %p) { ; CHECK-LABEL: @umul_neg1_vec_poison( ; CHECK-NEXT: [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], +; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], splat (i8 1) ; CHECK-NEXT: store <4 x i1> [[OV]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -865,8 +865,8 @@ define <4 x i8> @umul_neg1_vec_poison(<4 x i8> %x, ptr %p) { define <4 x i1> @smul_not_neg1_vec(<4 x i8> %x) { ; CHECK-LABEL: @smul_not_neg1_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[X:%.*]], -; CHECK-NEXT: [[OV:%.*]] = icmp ult <4 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[X:%.*]], splat (i8 -43) +; CHECK-NEXT: [[OV:%.*]] = icmp ult <4 x i8> [[TMP1]], splat (i8 -85) ; CHECK-NEXT: ret <4 x i1> [[OV]] ; %m = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> %x, <4 x i8> ) @@ -945,8 +945,8 @@ define i8 @umul_256(i8 %x, ptr %p) { define <4 x i8> @umul_4_vec_poison(<4 x i8> %x, ptr %p) { ; CHECK-LABEL: @umul_4_vec_poison( -; CHECK-NEXT: [[R:%.*]] = shl <4 x i8> [[X:%.*]], -; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = shl <4 x i8> [[X:%.*]], splat (i8 2) +; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], splat (i8 63) ; CHECK-NEXT: store <4 x i1> [[OV]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -1035,8 +1035,8 @@ define i8 @smul_128(i8 %x, ptr %p) { define <4 x i8> @smul_2_vec_poison(<4 x i8> %x, ptr %p) { ; CHECK-LABEL: @smul_2_vec_poison( -; CHECK-NEXT: [[R:%.*]] = shl <4 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = shl <4 x i8> [[X:%.*]], splat (i8 1) +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[X]], splat (i8 64) ; CHECK-NEXT: [[OV:%.*]] = icmp slt <4 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: store <4 x i1> [[OV]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] diff --git a/llvm/test/Transforms/InstCombine/xor-and-or.ll b/llvm/test/Transforms/InstCombine/xor-and-or.ll index feba47912b1dac..47275ce31070b5 100644 --- a/llvm/test/Transforms/InstCombine/xor-and-or.ll +++ b/llvm/test/Transforms/InstCombine/xor-and-or.ll @@ -52,7 +52,7 @@ define i1 @xor_logic_and_logic_or4(i1 %c, i1 %x, i1 %y) { define <3 x i1> @xor_logic_and_logic_or_vector1(<3 x i1> %c, <3 x i1> %x, <3 x i1> %y) { ; CHECK-LABEL: @xor_logic_and_logic_or_vector1( -; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[TMP1]], <3 x i1> [[Y:%.*]] ; CHECK-NEXT: ret <3 x i1> [[R]] ; @@ -65,7 +65,7 @@ define <3 x i1> @xor_logic_and_logic_or_vector1(<3 x i1> %c, <3 x i1> %x, <3 x i define <3 x i1> @xor_logic_and_logic_or_vector2(<3 x i1> %c, <3 x i1> %x, <3 x i1> %y) { ; CHECK-LABEL: @xor_logic_and_logic_or_vector2( ; CHECK-NEXT: [[TMP1:%.*]] = freeze <3 x i1> [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <3 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <3 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i1> [[TMP2]], <3 x i1> [[Y:%.*]] ; CHECK-NEXT: ret <3 x i1> [[R]] ; @@ -89,7 +89,7 @@ define <3 x i1> @xor_logic_and_logic_or_vector_poison1(<3 x i1> %c, <3 x i1> %x, define <3 x i1> @xor_logic_and_logic_or_vector_poison2(<3 x i1> %c, <3 x i1> %x, <3 x i1> %y) { ; CHECK-LABEL: @xor_logic_and_logic_or_vector_poison2( -; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[C:%.*]], <3 x i1> [[TMP1]], <3 x i1> [[Y:%.*]] ; CHECK-NEXT: ret <3 x i1> [[R]] ; @@ -125,7 +125,7 @@ define i1 @xor_and_logic_or2(i1 %c, i1 %x, i1 %y) { define <2 x i1> @xor_and_logic_or_vector(<2 x i1> %c, <2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @xor_and_logic_or_vector( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i1> [[TMP1]], <2 x i1> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -174,7 +174,7 @@ define i1 @xor_logic_and_or2(i1 %c, i1 %x, i1 %y) { define <2 x i1> @xor_logic_and_or_vector(<2 x i1> %c, <2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @xor_logic_and_or_vector( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i1> [[TMP1]], <2 x i1> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; @@ -215,7 +215,7 @@ define i1 @xor_and_or(i1 %c, i1 %x, i1 %y) { ;; and/or/xor on most backend, do we really need to do this transform? define <4 x i1> @xor_and_or_vector(<4 x i1> %c, <4 x i1> %x, <4 x i1> %y) { ; CHECK-LABEL: @xor_and_or_vector( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[X:%.*]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[TMP1]], <4 x i1> [[Y:%.*]] ; CHECK-NEXT: ret <4 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/xor-ashr.ll b/llvm/test/Transforms/InstCombine/xor-ashr.ll index 6f501184b63574..0c0554adcf1230 100644 --- a/llvm/test/Transforms/InstCombine/xor-ashr.ll +++ b/llvm/test/Transforms/InstCombine/xor-ashr.ll @@ -53,8 +53,8 @@ define i128 @testi128i128(i128 %add) { define <4 x i8> @testv4i16i8(<4 x i16> %add) { ; CHECK-LABEL: @testv4i16i8( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], -; CHECK-NEXT: [[X:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> , <4 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], splat (i16 -1) +; CHECK-NEXT: [[X:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> splat (i8 27), <4 x i8> splat (i8 -28) ; CHECK-NEXT: ret <4 x i8> [[X]] ; %sh = ashr <4 x i16> %add, @@ -65,7 +65,7 @@ define <4 x i8> @testv4i16i8(<4 x i16> %add) { define <4 x i8> @testv4i16i8_poison(<4 x i16> %add) { ; CHECK-LABEL: @testv4i16i8_poison( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], splat (i16 -1) ; CHECK-NEXT: [[X:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> , <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[X]] ; @@ -94,7 +94,7 @@ define i8 @wrongimm(i16 %add) { define <4 x i32> @vectorpoison(<6 x i32> %0) { ; CHECK-LABEL: @vectorpoison( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <6 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <6 x i32> [[TMP0:%.*]], splat (i32 -1) ; CHECK-NEXT: [[SHR:%.*]] = sext <6 x i1> [[ISNOTNEG]] to <6 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i32> [[SHR]], <6 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP1]] diff --git a/llvm/test/Transforms/InstCombine/xor-icmps.ll b/llvm/test/Transforms/InstCombine/xor-icmps.ll index f104cd7fdcada5..0384c1aa184b82 100644 --- a/llvm/test/Transforms/InstCombine/xor-icmps.ll +++ b/llvm/test/Transforms/InstCombine/xor-icmps.ll @@ -113,7 +113,7 @@ define i1 @slt_zero_sgt_minus1(i4 %x, i4 %y) { define <2 x i1> @sgt_minus1_slt_zero_sgt(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @sgt_minus1_slt_zero_sgt( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i4> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i4> [[TMP1]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %i1 = icmp sgt <2 x i4> %x, diff --git a/llvm/test/Transforms/InstCombine/xor-of-or.ll b/llvm/test/Transforms/InstCombine/xor-of-or.ll index 4f27303dcec302..1be3bc33af85b0 100644 --- a/llvm/test/Transforms/InstCombine/xor-of-or.ll +++ b/llvm/test/Transforms/InstCombine/xor-of-or.ll @@ -42,8 +42,8 @@ define i4 @t2(i4 %x) { ; Splat constants are fine too. define <2 x i4> @t3(<2 x i4> %x) { ; CHECK-LABEL: @t3( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[I1:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], splat (i4 3) +; CHECK-NEXT: [[I1:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 6) ; CHECK-NEXT: ret <2 x i4> [[I1]] ; %i0 = or <2 x i4> %x, @@ -55,7 +55,7 @@ define <2 x i4> @t3(<2 x i4> %x) { define <2 x i4> @t4(<2 x i4> %x) { ; CHECK-LABEL: @t4( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], -; CHECK-NEXT: [[I1:%.*]] = xor <2 x i4> [[TMP1]], +; CHECK-NEXT: [[I1:%.*]] = xor <2 x i4> [[TMP1]], splat (i4 6) ; CHECK-NEXT: ret <2 x i4> [[I1]] ; %i0 = or <2 x i4> %x, diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll index 8308dc503d4da2..3abaf74285cc05 100644 --- a/llvm/test/Transforms/InstCombine/xor.ll +++ b/llvm/test/Transforms/InstCombine/xor.ll @@ -111,7 +111,7 @@ define i1 @test9(i8 %A) { define <2 x i1> @test9vec(<2 x i8> %a) { ; CHECK-LABEL: @test9vec( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], splat (i8 89) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = xor <2 x i8> %a, @@ -153,7 +153,7 @@ define i1 @test12(i8 %A) { define <2 x i1> @test12vec(<2 x i8> %a) { ; CHECK-LABEL: @test12vec( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], splat (i8 4) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %b = xor <2 x i8> %a, @@ -222,7 +222,7 @@ define i32 @fold_zext_xor_sandwich(i1 %X) { define <2 x i32> @fold_zext_xor_sandwich_vec(<2 x i1> %X) { ; CHECK-LABEL: @fold_zext_xor_sandwich_vec( ; CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[Q:%.*]] = xor <2 x i32> [[Z]], +; CHECK-NEXT: [[Q:%.*]] = xor <2 x i32> [[Z]], splat (i32 3) ; CHECK-NEXT: ret <2 x i32> [[Q]] ; %Y = xor <2 x i1> %X, @@ -287,7 +287,7 @@ define i32 @test28(i32 %indvar) { define <2 x i32> @test28vec(<2 x i32> %indvar) { ; CHECK-LABEL: @test28vec( -; CHECK-NEXT: [[T214:%.*]] = add <2 x i32> [[INDVAR:%.*]], +; CHECK-NEXT: [[T214:%.*]] = add <2 x i32> [[INDVAR:%.*]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[T214]] ; %t7 = add <2 x i32> %indvar, @@ -307,7 +307,7 @@ define i32 @test28_sub(i32 %indvar) { define <2 x i32> @test28_subvec(<2 x i32> %indvar) { ; CHECK-LABEL: @test28_subvec( -; CHECK-NEXT: [[T214:%.*]] = sub <2 x i32> , [[INDVAR:%.*]] +; CHECK-NEXT: [[T214:%.*]] = sub <2 x i32> splat (i32 1), [[INDVAR:%.*]] ; CHECK-NEXT: ret <2 x i32> [[T214]] ; %t7 = sub <2 x i32> , %indvar @@ -327,7 +327,7 @@ define i32 @test29(i1 %C) { define <2 x i32> @test29vec(i1 %C) { ; CHECK-LABEL: @test29vec( -; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 915), <2 x i32> splat (i32 113) ; CHECK-NEXT: ret <2 x i32> [[V]] ; %A = select i1 %C, <2 x i32> , <2 x i32> @@ -374,7 +374,7 @@ define <2 x i32> @test30vec(i1 %which) { ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ splat (i32 915), [[ENTRY:%.*]] ], [ splat (i32 113), [[DELAY]] ] ; CHECK-NEXT: ret <2 x i32> [[A]] ; entry: @@ -759,7 +759,7 @@ define i32 @test45(i32 %x, i32 %y) { ; Check that we work with splat vectors also. define <4 x i32> @test46(<4 x i32> %x) { ; CHECK-LABEL: @test46( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X:%.*]], <4 x i32> splat (i32 255)) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = xor <4 x i32> %x, @@ -803,8 +803,8 @@ define i32 @test48(i32 %x) { define <2 x i32> @test48vec(<2 x i32> %x) { ; CHECK-LABEL: @test48vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[D:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 1) +; CHECK-NEXT: [[D:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1)) ; CHECK-NEXT: ret <2 x i32> [[D]] ; %a = sub <2 x i32> , %x @@ -829,7 +829,7 @@ define i32 @test49(i32 %x) { define <2 x i32> @test49vec(<2 x i32> %x) { ; CHECK-LABEL: @test49vec( -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> , [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> splat (i32 1), [[X:%.*]] ; CHECK-NEXT: [[D:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP1]], <2 x i32> zeroinitializer) ; CHECK-NEXT: ret <2 x i32> [[D]] ; @@ -857,8 +857,8 @@ define i32 @test50(i32 %x, i32 %y) { define <2 x i32> @test50vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test50vec( -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> , [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> splat (i32 1), [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 1) ; CHECK-NEXT: [[E:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) ; CHECK-NEXT: ret <2 x i32> [[E]] ; @@ -887,8 +887,8 @@ define i32 @test51(i32 %x, i32 %y) { define <2 x i32> @test51vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @test51vec( -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> , [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> splat (i32 -3), [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 -3) ; CHECK-NEXT: [[E:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) ; CHECK-NEXT: ret <2 x i32> [[E]] ; @@ -941,7 +941,7 @@ define i4 @or_or_xor_commute2(i4 %x, i4 %y, i4 %z) { define <2 x i4> @or_or_xor_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { ; CHECK-LABEL: @or_or_xor_commute3( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[Z:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[Z:%.*]], splat (i4 -1) ; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret <2 x i4> [[R]] @@ -1011,8 +1011,8 @@ define i8 @not_shl(i8 %x) { define <2 x i8> @not_shl_vec(<2 x i8> %x) { ; CHECK-LABEL: @not_shl_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shl <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1) +; CHECK-NEXT: [[R:%.*]] = shl <2 x i8> [[TMP1]], splat (i8 5) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %a = shl <2 x i8> %x, @@ -1061,7 +1061,7 @@ define i8 @not_lshr(i8 %x) { define <2 x i8> @not_lshr_vec(<2 x i8> %x) { ; CHECK-LABEL: @not_lshr_vec( -; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[ISNOTNEG]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1124,7 +1124,7 @@ define i8 @not_ashr(i8 %x) { define <2 x i8> @not_ashr_vec(<2 x i8> %x) { ; CHECK-LABEL: @not_ashr_vec( -; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[ISNOTNEG:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1) ; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[ISNOTNEG]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; @@ -1222,7 +1222,7 @@ define i32 @xor_andn_commute4(i32 %pa, i32 %pb) { define <2 x i64> @xor_orn(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @xor_orn( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[Z:%.*]] = xor <2 x i64> [[TMP1]], +; CHECK-NEXT: [[Z:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) ; CHECK-NEXT: ret <2 x i64> [[Z]] ; %nota = xor <2 x i64> %a, @@ -1350,7 +1350,7 @@ define i32 @ctlz_pow2(i32 %x) { define <2 x i8> @cttz_pow2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @cttz_pow2( -; CHECK-NEXT: [[S:%.*]] = shl nuw <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = shl nuw <2 x i8> splat (i8 1), [[X:%.*]] ; CHECK-NEXT: [[D:%.*]] = udiv exact <2 x i8> [[S]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[D]], i1 true) ; CHECK-NEXT: ret <2 x i8> [[R]] @@ -1561,10 +1561,10 @@ entry: define <2 x i32> @select_or_disjoint_xor_vec(<2 x i32> %a, i1 %c) { ; CHECK-LABEL: @select_or_disjoint_xor_vec( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], <2 x i32> zeroinitializer, <2 x i32> -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 4) +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 4) ; CHECK-NEXT: [[OR:%.*]] = or disjoint <2 x i32> [[S]], [[SHL]] -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[OR]], +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[OR]], splat (i32 4) ; CHECK-NEXT: ret <2 x i32> [[XOR]] ; entry: @@ -1595,10 +1595,10 @@ entry: define <2 x i32> @select_or_disjoint_or_vec(<2 x i32> %a, i1 %c) { ; CHECK-LABEL: @select_or_disjoint_or_vec( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], <2 x i32> zeroinitializer, <2 x i32> -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 4) +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 4) ; CHECK-NEXT: [[OR:%.*]] = or disjoint <2 x i32> [[S]], [[SHL]] -; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw <2 x i32> [[OR]], +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw <2 x i32> [[OR]], splat (i32 4) ; CHECK-NEXT: ret <2 x i32> [[ADD]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/xor2.ll b/llvm/test/Transforms/InstCombine/xor2.ll index 0b4fca76ed0a7f..641f32c6529e62 100644 --- a/llvm/test/Transforms/InstCombine/xor2.ll +++ b/llvm/test/Transforms/InstCombine/xor2.ll @@ -586,7 +586,7 @@ define <3 x i5> @not_xor_to_or_not_vector(<3 x i5> %a, <3 x i5> %b, <3 x i5> %c) ; CHECK-LABEL: @not_xor_to_or_not_vector( ; CHECK-NEXT: [[OR:%.*]] = or <3 x i5> [[B:%.*]], [[C:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <3 x i5> [[A:%.*]], [[C]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], splat (i5 -1) ; CHECK-NEXT: [[NOT:%.*]] = or <3 x i5> [[AND]], [[TMP1]] ; CHECK-NEXT: ret <3 x i5> [[NOT]] ; @@ -601,7 +601,7 @@ define <3 x i5> @not_xor_to_or_not_vector_poison(<3 x i5> %a, <3 x i5> %b, <3 x ; CHECK-LABEL: @not_xor_to_or_not_vector_poison( ; CHECK-NEXT: [[OR:%.*]] = or <3 x i5> [[B:%.*]], [[C:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <3 x i5> [[A:%.*]], [[C]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], splat (i5 -1) ; CHECK-NEXT: [[NOT:%.*]] = or <3 x i5> [[AND]], [[TMP1]] ; CHECK-NEXT: ret <3 x i5> [[NOT]] ; @@ -697,7 +697,7 @@ define <3 x i5> @xor_notand_to_or_not_vector(<3 x i5> %a, <3 x i5> %b, <3 x i5> ; CHECK-LABEL: @xor_notand_to_or_not_vector( ; CHECK-NEXT: [[OR:%.*]] = or <3 x i5> [[B:%.*]], [[C:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <3 x i5> [[A:%.*]], [[C]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], splat (i5 -1) ; CHECK-NEXT: [[XOR:%.*]] = or <3 x i5> [[AND]], [[TMP1]] ; CHECK-NEXT: ret <3 x i5> [[XOR]] ; @@ -712,7 +712,7 @@ define <3 x i5> @xor_notand_to_or_not_vector_poison(<3 x i5> %a, <3 x i5> %b, <3 ; CHECK-LABEL: @xor_notand_to_or_not_vector_poison( ; CHECK-NEXT: [[OR:%.*]] = or <3 x i5> [[B:%.*]], [[C:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <3 x i5> [[A:%.*]], [[C]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <3 x i5> [[OR]], splat (i5 -1) ; CHECK-NEXT: [[XOR:%.*]] = or <3 x i5> [[AND]], [[TMP1]] ; CHECK-NEXT: ret <3 x i5> [[XOR]] ; diff --git a/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll b/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll index c9da18d3d88bdb..271c303664b720 100644 --- a/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll @@ -73,7 +73,7 @@ define i32 @zext_add_scalar(i1 %x) { define <2 x i32> @zext_add_vec_splat(<2 x i1> %x) { ; CHECK-LABEL: @zext_add_vec_splat( -; CHECK-NEXT: [[ADD:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[ADD:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> splat (i32 43), <2 x i32> splat (i32 42) ; CHECK-NEXT: ret <2 x i32> [[ADD]] ; %zext = zext <2 x i1> %x to <2 x i32> diff --git a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll index 384ff8d2b7a3a3..97412d6ad5f8de 100644 --- a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll +++ b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll @@ -28,7 +28,7 @@ define i16 @trunc_ctlz_zext_i16_i32(i16 %x) { define <2 x i8> @trunc_ctlz_zext_v2i8_v2i33(<2 x i8> %x) { ; CHECK-LABEL: @trunc_ctlz_zext_v2i8_v2i33( ; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true) -; CHECK-NEXT: [[ZZ:%.*]] = add nuw nsw <2 x i8> [[TMP1]], +; CHECK-NEXT: [[ZZ:%.*]] = add nuw nsw <2 x i8> [[TMP1]], splat (i8 25) ; CHECK-NEXT: ret <2 x i8> [[ZZ]] ; %z = zext <2 x i8> %x to <2 x i33> diff --git a/llvm/test/Transforms/InstCombine/zext.ll b/llvm/test/Transforms/InstCombine/zext.ll index 872871cf15b033..e4d18e93952197 100644 --- a/llvm/test/Transforms/InstCombine/zext.ll +++ b/llvm/test/Transforms/InstCombine/zext.ll @@ -20,7 +20,7 @@ define i64 @test_sext_zext(i16 %A) { define <2 x i64> @test2(<2 x i1> %A) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i1> [[XOR]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ZEXT]] ; @@ -67,7 +67,7 @@ define i64 @fold_xor_zext_sandwich(i1 %a) { define <2 x i64> @fold_xor_zext_sandwich_vec(<2 x i1> %a) { ; CHECK-LABEL: @fold_xor_zext_sandwich_vec( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: [[ZEXT2:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[ZEXT2]] ; @@ -194,9 +194,9 @@ define i32 @masked_bit_set(i32 %x, i32 %y) { define <2 x i32> @masked_bit_clear(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @masked_bit_clear( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP2]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %sh1 = shl <2 x i32> , %y @@ -210,7 +210,7 @@ define <2 x i32> @masked_bit_set_commute(<2 x i32> %px, <2 x i32> %y) { ; CHECK-LABEL: @masked_bit_set_commute( ; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> , [[PX:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %x = srem <2 x i32> , %px ; thwart complexity-based canonicalization @@ -471,10 +471,10 @@ define i16 @zext_masked_bit_zero_to_smaller_bitwidth(i32 %a, i32 %b) { define <4 x i16> @zext_masked_bit_zero_to_smaller_bitwidth_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @zext_masked_bit_zero_to_smaller_bitwidth_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1) ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], [[B:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[Z:%.*]] = and <4 x i16> [[TMP3]], +; CHECK-NEXT: [[Z:%.*]] = and <4 x i16> [[TMP3]], splat (i16 1) ; CHECK-NEXT: ret <4 x i16> [[Z]] ; %shl = shl <4 x i32> , %b @@ -550,9 +550,9 @@ define i64 @zext_masked_bit_zero_to_larger_bitwidth(i32 %a, i32 %b) { define <4 x i64> @zext_masked_bit_zero_to_larger_bitwidth_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @zext_masked_bit_zero_to_larger_bitwidth_v4i32( -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1) ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], [[B:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[Z:%.*]] = zext nneg <4 x i32> [[TMP3]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[Z]] ; @@ -576,7 +576,7 @@ define i32 @notneg_zext_wider(i8 %x) { define <2 x i8> @notneg_zext_narrower(<2 x i32> %x) { ; CHECK-LABEL: @notneg_zext_narrower( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[CMP]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll b/llvm/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll index a296aca65bbf68..9c364d16badee7 100644 --- a/llvm/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll +++ b/llvm/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll @@ -7,12 +7,12 @@ define void @test1() personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: invoke void @bar() -; CHECK-NEXT: to label [[CONT:%.*]] unwind label [[LPAD:%.*]] +; CHECK-NEXT: to label [[CONT:%.*]] unwind label [[LPAD:%.*]] ; CHECK: cont: ; CHECK-NEXT: ret void ; CHECK: lpad: ; CHECK-NEXT: [[EX:%.*]] = landingpad { ptr, i32 } -; CHECK-NEXT: cleanup +; CHECK-NEXT: cleanup ; CHECK-NEXT: resume { ptr, i32 } [[EX]] ; entry: @@ -53,7 +53,7 @@ define i32 @test3(i32 %a, float %b) { define i8 @test4(<8 x i8> %V) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[ADD:%.*]] = add <8 x i8> [[V:%.*]], bitcast (<1 x double> to <8 x i8>) +; CHECK-NEXT: [[ADD:%.*]] = add <8 x i8> [[V:%.*]], bitcast (<1 x double> splat (double 0x319BEB8FD172E36) to <8 x i8>) ; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <8 x i8> [[ADD]], i32 6 ; CHECK-NEXT: ret i8 [[EXTRACT]] ; diff --git a/llvm/test/Transforms/InstSimplify/AndOrXor.ll b/llvm/test/Transforms/InstSimplify/AndOrXor.ll index 2e3a6052242038..814439a7b45348 100644 --- a/llvm/test/Transforms/InstSimplify/AndOrXor.ll +++ b/llvm/test/Transforms/InstSimplify/AndOrXor.ll @@ -286,7 +286,7 @@ define i1 @or_of_icmps0(i32 %b) { define <2 x i1> @or_of_icmps0_vec(<2 x i32> %b) { ; CHECK-LABEL: @or_of_icmps0_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %1 = add <2 x i32> %b, %2 = icmp uge <2 x i32> %1, @@ -308,7 +308,7 @@ define i1 @or_of_icmps1(i32 %b) { define <2 x i1> @or_of_icmps1_vec(<2 x i32> %b) { ; CHECK-LABEL: @or_of_icmps1_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %1 = add nsw <2 x i32> %b, %2 = icmp sge <2 x i32> %1, @@ -330,7 +330,7 @@ define i1 @or_of_icmps2(i32 %b) { define <2 x i1> @or_of_icmps2_vec(<2 x i32> %b) { ; CHECK-LABEL: @or_of_icmps2_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %1 = add <2 x i32> %b, %2 = icmp ugt <2 x i32> %1, @@ -352,7 +352,7 @@ define i1 @or_of_icmps3(i32 %b) { define <2 x i1> @or_of_icmps3_vec(<2 x i32> %b) { ; CHECK-LABEL: @or_of_icmps3_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %1 = add nsw <2 x i32> %b, %2 = icmp sgt <2 x i32> %1, @@ -374,7 +374,7 @@ define i1 @or_of_icmps4(i32 %b) { define <2 x i1> @or_of_icmps4_vec(<2 x i32> %b) { ; CHECK-LABEL: @or_of_icmps4_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %1 = add nuw <2 x i32> %b, %2 = icmp uge <2 x i32> %1, @@ -396,7 +396,7 @@ define i1 @or_of_icmps5(i32 %b) { define <2 x i1> @or_of_icmps5_vec(<2 x i32> %b) { ; CHECK-LABEL: @or_of_icmps5_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %1 = add nuw <2 x i32> %b, %2 = icmp ugt <2 x i32> %1, @@ -492,7 +492,7 @@ define <2 x i3> @and_of_different_cast_icmps_vec(<2 x i8> %i, <2 x i16> %j) { ; CHECK-LABEL: @and_of_different_cast_icmps_vec( ; CHECK-NEXT: [[CMP0:%.*]] = icmp eq <2 x i8> [[I:%.*]], zeroinitializer ; CHECK-NEXT: [[CONV0:%.*]] = zext <2 x i1> [[CMP0]] to <2 x i3> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i16> [[J:%.*]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i16> [[J:%.*]], splat (i16 1) ; CHECK-NEXT: [[CONV1:%.*]] = zext <2 x i1> [[CMP1]] to <2 x i3> ; CHECK-NEXT: [[AND:%.*]] = and <2 x i3> [[CONV0]], [[CONV1]] ; CHECK-NEXT: ret <2 x i3> [[AND]] @@ -680,7 +680,7 @@ define i3 @or_xorn_and_commute1(i3 %a, i3 %b) { define <2 x i32> @or_xorn_and_commute2(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @or_xorn_and_commute2( -; CHECK-NEXT: [[NEGA:%.*]] = xor <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[NEGA:%.*]] = xor <2 x i32> [[A:%.*]], splat (i32 -1) ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[B:%.*]], [[NEGA]] ; CHECK-NEXT: ret <2 x i32> [[XOR]] ; @@ -800,7 +800,7 @@ define i8 @lshr_perfect_mask(i8 %x) { define <2 x i8> @lshr_oversized_mask_splat(<2 x i8> %x) { ; CHECK-LABEL: @lshr_oversized_mask_splat( -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 5) ; CHECK-NEXT: ret <2 x i8> [[SH]] ; %sh = lshr <2 x i8> %x, @@ -821,7 +821,7 @@ define i8 @lshr_undersized_mask(i8 %x) { define <2 x i8> @shl_perfect_mask_splat(<2 x i8> %x) { ; CHECK-LABEL: @shl_perfect_mask_splat( -; CHECK-NEXT: [[SH:%.*]] = shl <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[SH:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 6) ; CHECK-NEXT: ret <2 x i8> [[SH]] ; %sh = shl <2 x i8> %x, @@ -841,8 +841,8 @@ define i8 @shl_oversized_mask(i8 %x) { define <2 x i8> @shl_undersized_mask_splat(<2 x i8> %x) { ; CHECK-LABEL: @shl_undersized_mask_splat( -; CHECK-NEXT: [[SH:%.*]] = shl <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i8> [[SH]], +; CHECK-NEXT: [[SH:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 6) +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i8> [[SH]], splat (i8 -120) ; CHECK-NEXT: ret <2 x i8> [[MASK]] ; %sh = shl <2 x i8> %x, @@ -998,7 +998,7 @@ define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) { define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) { ; CHECK-LABEL: @shl_or_and2v( ; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i64> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i64> [[T1]], splat (i64 32) ; CHECK-NEXT: ret <2 x i64> [[T3]] ; %t1 = zext <2 x i1> %b to <2 x i64> @@ -1014,9 +1014,9 @@ define <2 x i32> @shl_or_and3v(<2 x i16> %a, <2 x i16> %b) { ; CHECK-LABEL: @shl_or_and3v( ; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32> ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i16> [[B:%.*]] to <2 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i32> [[T1]], splat (i32 16) ; CHECK-NEXT: [[T4:%.*]] = or <2 x i32> [[T2]], [[T3]] -; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], +; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], splat (i32 -65535) ; CHECK-NEXT: ret <2 x i32> [[T5]] ; %t1 = zext <2 x i16> %a to <2 x i32> @@ -1059,7 +1059,7 @@ define i89 @or_add_sub(i89 %x) { define <3 x i8> @or_sub_add(<3 x i8> %x) { ; CHECK-LABEL: @or_sub_add( -; CHECK-NEXT: ret <3 x i8> +; CHECK-NEXT: ret <3 x i8> splat (i8 -1) ; %a = add <3 x i8> %x, %s = sub <3 x i8> , %x @@ -1070,7 +1070,7 @@ define <3 x i8> @or_sub_add(<3 x i8> %x) { define <2 x i17> @xor_add_sub(<2 x i17> %x) { ; CHECK-LABEL: @xor_add_sub( -; CHECK-NEXT: ret <2 x i17> +; CHECK-NEXT: ret <2 x i17> splat (i17 -1) ; %a = add <2 x i17> %x, %s = sub <2 x i17> , %x diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/ARM/mve-vctp.ll b/llvm/test/Transforms/InstSimplify/ConstProp/ARM/mve-vctp.ll index 979fb2e84ba924..530547b79fabeb 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/ARM/mve-vctp.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/ARM/mve-vctp.ll @@ -46,7 +46,7 @@ entry: define <16 x i1> @vctp8_16() { ; CHECK-LABEL: @vctp8_16( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 16) @@ -56,7 +56,7 @@ entry: define <16 x i1> @vctp8_100() { ; CHECK-LABEL: @vctp8_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 100) @@ -66,7 +66,7 @@ entry: define <16 x i1> @vctp8_m1() { ; CHECK-LABEL: @vctp8_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 -1) @@ -118,7 +118,7 @@ entry: define <8 x i1> @vctp16_8() { ; CHECK-LABEL: @vctp16_8( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 8) @@ -128,7 +128,7 @@ entry: define <8 x i1> @vctp16_100() { ; CHECK-LABEL: @vctp16_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 100) @@ -138,7 +138,7 @@ entry: define <8 x i1> @vctp16_m1() { ; CHECK-LABEL: @vctp16_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 -1) @@ -180,7 +180,7 @@ entry: define <4 x i1> @vctp32_4() { ; CHECK-LABEL: @vctp32_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 4) @@ -190,7 +190,7 @@ entry: define <4 x i1> @vctp32_100() { ; CHECK-LABEL: @vctp32_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 100) @@ -200,7 +200,7 @@ entry: define <4 x i1> @vctp32_m1() { ; CHECK-LABEL: @vctp32_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 -1) @@ -232,7 +232,7 @@ entry: define <2 x i1> @vctp64_2() { ; CHECK-LABEL: @vctp64_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; entry: %int = call <2 x i1> @llvm.arm.mve.vctp64(i32 2) @@ -242,7 +242,7 @@ entry: define <2 x i1> @vctp64_100() { ; CHECK-LABEL: @vctp64_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; entry: %int = call <2 x i1> @llvm.arm.mve.vctp64(i32 100) @@ -252,7 +252,7 @@ entry: define <2 x i1> @vctp64_m1() { ; CHECK-LABEL: @vctp64_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; entry: %int = call <2 x i1> @llvm.arm.mve.vctp64(i32 -1) @@ -260,6 +260,12 @@ entry: } define <4 x float> @poisonc(<4 x float> %a) { +; CHECK-LABEL: @poisonc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR27:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 poison) +; CHECK-NEXT: [[VAR33:%.*]] = select <4 x i1> [[VAR27]], <4 x float> [[A:%.*]], <4 x float> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[VAR33]] +; entry: %new0 = shl i1 0, 1 %last = zext i1 %new0 to i32 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll index 4a879e837f8354..a904e697cc9757 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll @@ -46,7 +46,7 @@ entry: define <16 x i1> @v16i1_16() { ; CHECK-LABEL: @v16i1_16( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 16) @@ -56,7 +56,7 @@ entry: define <16 x i1> @v16i1_100() { ; CHECK-LABEL: @v16i1_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 100) @@ -66,7 +66,7 @@ entry: define <16 x i1> @v16i1_m1() { ; CHECK-LABEL: @v16i1_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <16 x i1> +; CHECK-NEXT: ret <16 x i1> splat (i1 true) ; entry: %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 -1) @@ -138,7 +138,7 @@ entry: define <8 x i1> @v8i1_8() { ; CHECK-LABEL: @v8i1_8( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 8) @@ -148,7 +148,7 @@ entry: define <8 x i1> @v8i1_100() { ; CHECK-LABEL: @v8i1_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 100) @@ -158,7 +158,7 @@ entry: define <8 x i1> @v8i1_m1() { ; CHECK-LABEL: @v8i1_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) ; entry: %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 -1) @@ -220,7 +220,7 @@ entry: define <4 x i1> @v4i1_4() { ; CHECK-LABEL: @v4i1_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4) @@ -230,7 +230,7 @@ entry: define <4 x i1> @v4i1_100() { ; CHECK-LABEL: @v4i1_100( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 100) @@ -240,7 +240,7 @@ entry: define <4 x i1> @v4i1_m1() { ; CHECK-LABEL: @v4i1_m1( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; entry: %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 -1) @@ -293,6 +293,12 @@ entry: define <4 x float> @poisonc(<4 x float> %a, i32 %n) { +; CHECK-LABEL: @poisonc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR27:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 1024) +; CHECK-NEXT: [[VAR33:%.*]] = select <4 x i1> [[VAR27]], <4 x float> [[A:%.*]], <4 x float> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[VAR33]] +; entry: %new0 = shl i1 0, 1 %last = zext i1 %new0 to i32 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/bitcast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/bitcast.ll index 9c75b66db50cc7..a21124eaad6c57 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/bitcast.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/bitcast.ll @@ -4,7 +4,7 @@ define <1 x i64> @test1() { ; CHECK-LABEL: @test1( -; CHECK-NEXT: ret <1 x i64> +; CHECK-NEXT: ret <1 x i64> splat (i64 63) ; %A = bitcast i64 63 to <1 x i64> ret <1 x i64> %A diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll index a2ed8cd6178d86..b51f061d7918c3 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll @@ -57,7 +57,7 @@ define float @nan_f64_trunc() { define <3 x half> @nan_v3f64_trunc() { ; CHECK-LABEL: @nan_v3f64_trunc( -; CHECK-NEXT: ret <3 x half> +; CHECK-NEXT: ret <3 x half> splat (half 0xH7E00) ; %f = fptrunc <3 x double> to <3 x half> ret <3 x half> %f diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/saturating-add-sub.ll b/llvm/test/Transforms/InstSimplify/ConstProp/saturating-add-sub.ll index deeb238bdd0e4e..fde98c43e7687d 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/saturating-add-sub.ll @@ -61,7 +61,7 @@ define <2 x i8> @test_uadd_vector_no_sat(<2 x i8> %a) { define <2 x i8> @test_uadd_vector_sat(<2 x i8> %a) { ; CHECK-LABEL: @test_uadd_vector_sat( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x @@ -77,7 +77,7 @@ define <2 x i8> @test_sadd_vector_no_sat(<2 x i8> %a) { define <2 x i8> @test_sadd_vector_sat_pos(<2 x i8> %a) { ; CHECK-LABEL: @test_sadd_vector_sat_pos( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 127) ; %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x @@ -85,7 +85,7 @@ define <2 x i8> @test_sadd_vector_sat_pos(<2 x i8> %a) { define <2 x i8> @test_sadd_vector_sat_neg(<2 x i8> %a) { ; CHECK-LABEL: @test_sadd_vector_sat_neg( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -128) ; %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x @@ -157,7 +157,7 @@ define <2 x i8> @test_ssub_vector_no_sat(<2 x i8> %a) { define <2 x i8> @test_ssub_vector_sat_pos(<2 x i8> %a) { ; CHECK-LABEL: @test_ssub_vector_sat_pos( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 127) ; %x = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x @@ -165,7 +165,7 @@ define <2 x i8> @test_ssub_vector_sat_pos(<2 x i8> %a) { define <2 x i8> @test_ssub_vector_sat_neg(<2 x i8> %a) { ; CHECK-LABEL: @test_ssub_vector_sat_neg( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -128) ; %x = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x @@ -271,7 +271,7 @@ define <2 x i8> @test_ssub_vector_both_undef_splat() { define <2 x i8> @test_uadd_vector_op2_undef_splat() { ; CHECK-LABEL: @test_uadd_vector_op2_undef_splat( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> , <2 x i8> undef) ret <2 x i8> %x @@ -279,7 +279,7 @@ define <2 x i8> @test_uadd_vector_op2_undef_splat() { define <2 x i8> @test_sadd_vector_op1_undef_splat() { ; CHECK-LABEL: @test_sadd_vector_op1_undef_splat( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> ) ret <2 x i8> %x @@ -311,7 +311,7 @@ define <2 x i8> @test_uadd_vector_op2_undef_mix1() { define <2 x i8> @test_uadd_vector_op2_undef_mix2() { ; CHECK-LABEL: @test_uadd_vector_op2_undef_mix2( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x @@ -327,7 +327,7 @@ define <2 x i8> @test_sadd_vector_op1_undef_mix1() { define <2 x i8> @test_sadd_vector_op1_undef_mix2() { ; CHECK-LABEL: @test_sadd_vector_op1_undef_mix2( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> , <2 x i8> ) ret <2 x i8> %x diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll index 2be619828e9ea1..4aed1c5dd7d2a1 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll @@ -64,7 +64,7 @@ define <2 x ptr> @constant_undef_index() { define <2 x ptr> @constant_inbounds() { ; CHECK-LABEL: define <2 x ptr> @constant_inbounds() { -; CHECK-NEXT: ret <2 x ptr> getelementptr (i8, ptr @g, <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> getelementptr (i8, ptr @g, <2 x i64> splat (i64 1)) ; %gep = getelementptr i8, ptr @g, <2 x i64> ret <2 x ptr> %gep diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector-inseltpoison.ll index be93d6f9d8be31..c37dbd6d3a3507 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector-inseltpoison.ll @@ -28,7 +28,7 @@ define @vscale_version() { ; The non-scalable version should be optimized as normal. ; CHECK-LABEL: define <8 x i1> @fixed_length_version() { -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) define <8 x i1> @fixed_length_version() { %splatter = insertelement <8 x i1> poison, i1 true, i32 0 %foo = shufflevector <8 x i1> %splatter, diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll index a2f50a637531a9..b575e656f552f1 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-shufflevector.ll @@ -28,7 +28,7 @@ define @vscale_version() { ; The non-scalable version should be optimized as normal. ; CHECK-LABEL: define <8 x i1> @fixed_length_version() { -; CHECK-NEXT: ret <8 x i1> +; CHECK-NEXT: ret <8 x i1> splat (i1 true) define <8 x i1> @fixed_length_version() { %splatter = insertelement <8 x i1> undef, i1 true, i32 0 %foo = shufflevector <8 x i1> %splatter, diff --git a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll index 4153872c0adc23..97390ceaad396f 100644 --- a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll +++ b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll @@ -64,7 +64,7 @@ define i32 @zext_abs(i31 %x) { define <3 x i82> @lshr_abs(<3 x i82> %x) { ; CHECK-LABEL: @lshr_abs( -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], splat (i82 1) ; CHECK-NEXT: [[ABS:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[LSHR]], i1 true) ; CHECK-NEXT: ret <3 x i82> [[ABS]] ; @@ -121,7 +121,7 @@ define i1 @abs_nsw_must_be_positive(i32 %x) { define <4 x i1> @abs_nsw_must_be_positive_vec(<4 x i32> %x) { ; CHECK-LABEL: @abs_nsw_must_be_positive_vec( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) %c2 = icmp sge <4 x i32> %abs, zeroinitializer @@ -163,7 +163,7 @@ define i1 @abs_known_positive_input_compare(i31 %x) { define <4 x i1> @abs_known_positive_input_compare_vec(<4 x i31> %x) { ; CHECK-LABEL: @abs_known_positive_input_compare_vec( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %zext = zext <4 x i31> %x to <4 x i32> %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %zext, i1 false) @@ -183,7 +183,7 @@ define i1 @abs_known_not_int_min(i32 %x) { define <4 x i1> @abs_known_not_int_min_vec(<4 x i32> %x) { ; CHECK-LABEL: @abs_known_not_int_min_vec( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %or = or <4 x i32> %x, %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %or, i1 false) diff --git a/llvm/test/Transforms/InstSimplify/add_vp.ll b/llvm/test/Transforms/InstSimplify/add_vp.ll index 327d45c4f24cc5..320f3fbb303797 100644 --- a/llvm/test/Transforms/InstSimplify/add_vp.ll +++ b/llvm/test/Transforms/InstSimplify/add_vp.ll @@ -10,7 +10,7 @@ declare <2 x i8> @llvm.vp.sub.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) ; Constant folding should just work. define <2 x i32> @constant_vp_add(<2 x i1> %mask, i32 %evl) { ; CHECK-LABEL: @constant_vp_add( -; CHECK-NEXT: [[Q:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> , <2 x i32> , <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; CHECK-NEXT: [[Q:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> splat (i32 3), <2 x i32> splat (i32 7), <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; CHECK-NEXT: ret <2 x i32> [[Q]] ; %Q = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> , <2 x i32> , <2 x i1> %mask, i32 %evl) diff --git a/llvm/test/Transforms/InstSimplify/addsub.ll b/llvm/test/Transforms/InstSimplify/addsub.ll index dcfdb4d2a5404b..aa13b3194c464c 100644 --- a/llvm/test/Transforms/InstSimplify/addsub.ll +++ b/llvm/test/Transforms/InstSimplify/addsub.ll @@ -12,7 +12,7 @@ define i1 @test1(i1 %a) { define <2 x i1> @test2(<2 x i1> %a) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %b = xor <2 x i1> %a, %res = sub <2 x i1> %a, %b diff --git a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll index 2e75a0d2c98abf..3f1672d66abf0d 100644 --- a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll +++ b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll @@ -68,11 +68,8 @@ define <4 x i32> @test8(<1 x i64> %y) { } define <4 x i32> @test9(<1 x i64> %y) { -; CONSTVEC-LABEL: @test9( -; CONSTVEC-NEXT: ret <4 x i32> -; -; CONSTSPLAT-LABEL: @test9( -; CONSTSPLAT-NEXT: ret <4 x i32> splat (i32 -1) +; CHECK-LABEL: @test9( +; CHECK-NEXT: ret <4 x i32> splat (i32 -1) ; %c = bitcast <2 x i64> to <4 x i32> ret <4 x i32> %c @@ -80,7 +77,7 @@ define <4 x i32> @test9(<1 x i64> %y) { define <1 x i1> @test10() { ; CONSTVEC-LABEL: @test10( -; CONSTVEC-NEXT: [[RET:%.*]] = icmp eq <1 x i64> to i64)>, zeroinitializer +; CONSTVEC-NEXT: [[RET:%.*]] = icmp eq <1 x i64> splat (double 0xFFFFFFFFFFFFFFFF) to i64)>, zeroinitializer ; CONSTVEC-NEXT: ret <1 x i1> [[RET]] ; ; CONSTSPLAT-LABEL: @test10( @@ -93,11 +90,8 @@ define <1 x i1> @test10() { ; from MultiSource/Benchmarks/Bullet define <2 x float> @foo() { -; CONSTVEC-LABEL: @foo( -; CONSTVEC-NEXT: ret <2 x float> -; -; CONSTSPLAT-LABEL: @foo( -; CONSTSPLAT-NEXT: ret <2 x float> splat (float 0xFFFFFFFFE0000000) +; CHECK-LABEL: @foo( +; CHECK-NEXT: ret <2 x float> splat (float 0xFFFFFFFFE0000000) ; %cast = bitcast i64 -1 to <2 x float> ret <2 x float> %cast @@ -105,22 +99,16 @@ define <2 x float> @foo() { define <2 x double> @foo2() { -; CONSTVEC-LABEL: @foo2( -; CONSTVEC-NEXT: ret <2 x double> -; -; CONSTSPLAT-LABEL: @foo2( -; CONSTSPLAT-NEXT: ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF) +; CHECK-LABEL: @foo2( +; CHECK-NEXT: ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF) ; %cast = bitcast i128 -1 to <2 x double> ret <2 x double> %cast } define <1 x float> @foo3() { -; CONSTVEC-LABEL: @foo3( -; CONSTVEC-NEXT: ret <1 x float> -; -; CONSTSPLAT-LABEL: @foo3( -; CONSTSPLAT-NEXT: ret <1 x float> splat (float 0xFFFFFFFFE0000000) +; CHECK-LABEL: @foo3( +; CHECK-NEXT: ret <1 x float> splat (float 0xFFFFFFFFE0000000) ; %cast = bitcast i32 -1 to <1 x float> ret <1 x float> %cast @@ -143,11 +131,8 @@ define double @foo5() { } define <2 x double> @foo6() { -; CONSTVEC-LABEL: @foo6( -; CONSTVEC-NEXT: ret <2 x double> -; -; CONSTSPLAT-LABEL: @foo6( -; CONSTSPLAT-NEXT: ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF) +; CHECK-LABEL: @foo6( +; CHECK-NEXT: ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF) ; %cast = bitcast <4 x i32> to <2 x double> ret <2 x double> %cast @@ -299,7 +284,7 @@ define <16 x i8> @bitcast_constexpr_16i8_8i16_u256uuu256uu() { define <1 x i32> @bitcast_constexpr_scalar_fp_to_vector_int() { ; CONSTVEC-LABEL: @bitcast_constexpr_scalar_fp_to_vector_int( -; CONSTVEC-NEXT: ret <1 x i32> +; CONSTVEC-NEXT: ret <1 x i32> splat (i32 1065353216) ; ; CONSTSPLAT-LABEL: @bitcast_constexpr_scalar_fp_to_vector_int( ; CONSTSPLAT-NEXT: ret <1 x i32> bitcast (<1 x float> splat (float 1.000000e+00) to <1 x i32>) diff --git a/llvm/test/Transforms/InstSimplify/bitreverse-fold.ll b/llvm/test/Transforms/InstSimplify/bitreverse-fold.ll index d872a5bbbccfd4..ba42e0f21d0777 100644 --- a/llvm/test/Transforms/InstSimplify/bitreverse-fold.ll +++ b/llvm/test/Transforms/InstSimplify/bitreverse-fold.ll @@ -73,7 +73,7 @@ define <2 x i1> @reverse_false_v2i1() { } ; CHECK-LABEL: @reverse_true_v2i1( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) define <2 x i1> @reverse_true_v2i1() { %x = call <2 x i1> @llvm.bitreverse.v2i1(<2 x i1> ) ret <2 x i1> %x diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index c6f6b65f89dc2b..67d5c4dbfb2e7d 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -982,7 +982,7 @@ define <2 x i8> @fshr_zero_vec(<2 x i8> %shamt) { define <2 x i7> @fshl_ones_vec(<2 x i7> %shamt) { ; CHECK-LABEL: @fshl_ones_vec( -; CHECK-NEXT: ret <2 x i7> +; CHECK-NEXT: ret <2 x i7> splat (i7 -1) ; %r = call <2 x i7> @llvm.fshl.v2i7(<2 x i7> , <2 x i7> , <2 x i7> %shamt) ret <2 x i7> %r @@ -1421,7 +1421,7 @@ define i32 @ctpop_pow2(i32 %x) { define <3 x i33> @ctpop_signbit(<3 x i33> %x) { ; CHECK-LABEL: @ctpop_signbit( -; CHECK-NEXT: [[B:%.*]] = lshr <3 x i33> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = lshr <3 x i33> [[X:%.*]], splat (i33 32) ; CHECK-NEXT: ret <3 x i33> [[B]] ; %b = lshr <3 x i33> %x, @@ -1433,7 +1433,7 @@ define <3 x i33> @ctpop_signbit(<3 x i33> %x) { define <3 x i33> @ctpop_notsignbit(<3 x i33> %x) { ; CHECK-LABEL: @ctpop_notsignbit( -; CHECK-NEXT: [[B:%.*]] = lshr <3 x i33> [[X:%.*]], +; CHECK-NEXT: [[B:%.*]] = lshr <3 x i33> [[X:%.*]], splat (i33 31) ; CHECK-NEXT: [[R:%.*]] = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> [[B]]) ; CHECK-NEXT: ret <3 x i33> [[R]] ; diff --git a/llvm/test/Transforms/InstSimplify/canonicalize.ll b/llvm/test/Transforms/InstSimplify/canonicalize.ll index c3877ec0cef8d1..9d2bdd1b853e61 100644 --- a/llvm/test/Transforms/InstSimplify/canonicalize.ll +++ b/llvm/test/Transforms/InstSimplify/canonicalize.ll @@ -27,7 +27,7 @@ define <2 x float> @canonicalize_zero_vector() { define <2 x float> @canonicalize_negzero_vector() { ; CHECK-LABEL: @canonicalize_negzero_vector( -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float -0.000000e+00) ; %ret = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> ) ret <2 x float> %ret diff --git a/llvm/test/Transforms/InstSimplify/cast-unsigned-icmp-cmp-0.ll b/llvm/test/Transforms/InstSimplify/cast-unsigned-icmp-cmp-0.ll index 8f9c75bae1097d..20bdbb91ba8575 100644 --- a/llvm/test/Transforms/InstSimplify/cast-unsigned-icmp-cmp-0.ll +++ b/llvm/test/Transforms/InstSimplify/cast-unsigned-icmp-cmp-0.ll @@ -49,7 +49,7 @@ define i1 @i32_cast_cmp_sgt_int_m1_uitofp_float(i32 %i) { define <2 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_float_vec(<2 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_uitofp_float_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %f = uitofp <2 x i32> %i to <2 x float> %b = bitcast <2 x float> %f to <2 x i32> @@ -72,7 +72,7 @@ define i1 @i32_cast_cmp_sgt_int_m1_uitofp_float_vec_mismatch(<2 x i32> %i) { define <3 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_float_vec_poison(<3 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_uitofp_float_vec_poison( -; CHECK-NEXT: ret <3 x i1> +; CHECK-NEXT: ret <3 x i1> splat (i1 true) ; %f = uitofp <3 x i32> %i to <3 x float> %b = bitcast <3 x float> %f to <3 x i32> @@ -122,7 +122,7 @@ define i1 @i32_cast_cmp_sgt_int_m1_uitofp_double(i32 %i) { define <2 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_double_vec(<2 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_uitofp_double_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %f = uitofp <2 x i32> %i to <2 x double> %b = bitcast <2 x double> %f to <2 x i64> @@ -132,7 +132,7 @@ define <2 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_double_vec(<2 x i32> %i) { define <3 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_double_vec_poison(<3 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_uitofp_double_vec_poison( -; CHECK-NEXT: ret <3 x i1> +; CHECK-NEXT: ret <3 x i1> splat (i1 true) ; %f = uitofp <3 x i32> %i to <3 x double> %b = bitcast <3 x double> %f to <3 x i64> @@ -182,7 +182,7 @@ define i1 @i32_cast_cmp_sgt_int_m1_uitofp_half(i32 %i) { define <2 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_half_vec(<2 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_uitofp_half_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %f = uitofp <2 x i32> %i to <2 x half> %b = bitcast <2 x half> %f to <2 x i16> @@ -192,7 +192,7 @@ define <2 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_half_vec(<2 x i32> %i) { define <3 x i1> @i32_cast_cmp_sgt_int_m1_uitofp_half_vec_poison(<3 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_uitofp_half_vec_poison( -; CHECK-NEXT: ret <3 x i1> +; CHECK-NEXT: ret <3 x i1> splat (i1 true) ; %f = uitofp <3 x i32> %i to <3 x half> %b = bitcast <3 x half> %f to <3 x i16> diff --git a/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll b/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll index 169ecc3c2d37e3..f9cad7e4345051 100644 --- a/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll +++ b/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll @@ -5,7 +5,7 @@ define <2 x i1> @i32cmp_eq_fixed_zero() { ; CHECK-LABEL: @i32cmp_eq_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp eq <2 x i32> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -21,7 +21,7 @@ define @i32cmp_eq_scalable_zero() { define <2 x i1> @i32cmp_eq_fixed_one() { ; CHECK-LABEL: @i32cmp_eq_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp eq <2 x i32> , ret <2 x i1> %res @@ -101,7 +101,7 @@ define @i32cmp_ugt_scalable_one() { define <2 x i1> @i32cmp_uge_fixed_zero() { ; CHECK-LABEL: @i32cmp_uge_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp uge <2 x i32> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -117,7 +117,7 @@ define @i32cmp_uge_scalable_zero() { define <2 x i1> @i32cmp_uge_fixed_one() { ; CHECK-LABEL: @i32cmp_uge_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp uge <2 x i32> , ret <2 x i1> %res @@ -165,7 +165,7 @@ define @i32cmp_ult_scalable_one() { define <2 x i1> @i32cmp_ule_fixed_zero() { ; CHECK-LABEL: @i32cmp_ule_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp ule <2 x i32> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -181,7 +181,7 @@ define @i32cmp_ule_scalable_zero() { define <2 x i1> @i32cmp_ule_fixed_one() { ; CHECK-LABEL: @i32cmp_ule_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp ule <2 x i32> , ret <2 x i1> %res @@ -229,7 +229,7 @@ define @i32cmp_sgt_scalable_one() { define <2 x i1> @i32cmp_sge_fixed_zero() { ; CHECK-LABEL: @i32cmp_sge_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp sge <2 x i32> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -245,7 +245,7 @@ define @i32cmp_sge_scalable_zero() { define <2 x i1> @i32cmp_sge_fixed_one() { ; CHECK-LABEL: @i32cmp_sge_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp sge <2 x i32> , ret <2 x i1> %res @@ -293,7 +293,7 @@ define @i32cmp_slt_scalable_one() { define <2 x i1> @i32cmp_sle_fixed_zero() { ; CHECK-LABEL: @i32cmp_sle_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp sle <2 x i32> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -309,7 +309,7 @@ define @i32cmp_sle_scalable_zero() { define <2 x i1> @i32cmp_sle_fixed_one() { ; CHECK-LABEL: @i32cmp_sle_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = icmp sle <2 x i32> , ret <2 x i1> %res @@ -357,7 +357,7 @@ define @floatcmp_false_scalable_one() { define <2 x i1> @floatcmp_oeq_fixed_zero() { ; CHECK-LABEL: @floatcmp_oeq_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp oeq <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -373,7 +373,7 @@ define @floatcmp_oeq_scalable_zero() { define <2 x i1> @floatcmp_oeq_fixed_one() { ; CHECK-LABEL: @floatcmp_oeq_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp oeq <2 x float> , ret <2 x i1> %res @@ -421,7 +421,7 @@ define @floatcmp_ogt_scalable_one() { define <2 x i1> @floatcmp_oge_fixed_zero() { ; CHECK-LABEL: @floatcmp_oge_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp oge <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -437,7 +437,7 @@ define @floatcmp_oge_scalable_zero() { define <2 x i1> @floatcmp_oge_fixed_one() { ; CHECK-LABEL: @floatcmp_oge_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp oge <2 x float> , ret <2 x i1> %res @@ -485,7 +485,7 @@ define @floatcmp_olt_scalable_one() { define <2 x i1> @floatcmp_ole_fixed_zero() { ; CHECK-LABEL: @floatcmp_ole_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ole <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -501,7 +501,7 @@ define @floatcmp_ole_scalable_zero() { define <2 x i1> @floatcmp_ole_fixed_one() { ; CHECK-LABEL: @floatcmp_ole_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ole <2 x float> , ret <2 x i1> %res @@ -549,7 +549,7 @@ define @floatcmp_one_scalable_one() { define <2 x i1> @floatcmp_ord_fixed_zero() { ; CHECK-LABEL: @floatcmp_ord_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ord <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -565,7 +565,7 @@ define @floatcmp_ord_scalable_zero() { define <2 x i1> @floatcmp_ord_fixed_one() { ; CHECK-LABEL: @floatcmp_ord_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ord <2 x float> , ret <2 x i1> %res @@ -581,7 +581,7 @@ define @floatcmp_ord_scalable_one() { define <2 x i1> @floatcmp_ueq_fixed_zero() { ; CHECK-LABEL: @floatcmp_ueq_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ueq <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -597,7 +597,7 @@ define @floatcmp_ueq_scalable_zero() { define <2 x i1> @floatcmp_ueq_fixed_one() { ; CHECK-LABEL: @floatcmp_ueq_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ueq <2 x float> , ret <2 x i1> %res @@ -645,7 +645,7 @@ define @floatcmp_ugt_scalable_one() { define <2 x i1> @floatcmp_uge_fixed_zero() { ; CHECK-LABEL: @floatcmp_uge_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp uge <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -661,7 +661,7 @@ define @floatcmp_uge_scalable_zero() { define <2 x i1> @floatcmp_uge_fixed_one() { ; CHECK-LABEL: @floatcmp_uge_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp uge <2 x float> , ret <2 x i1> %res @@ -709,7 +709,7 @@ define @floatcmp_ult_scalable_one() { define <2 x i1> @floatcmp_ule_fixed_zero() { ; CHECK-LABEL: @floatcmp_ule_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ule <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -725,7 +725,7 @@ define @floatcmp_ule_scalable_zero() { define <2 x i1> @floatcmp_ule_fixed_one() { ; CHECK-LABEL: @floatcmp_ule_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp ule <2 x float> , ret <2 x i1> %res @@ -805,7 +805,7 @@ define @floatcmp_uno_scalable_one() { define <2 x i1> @floatcmp_true_fixed_zero() { ; CHECK-LABEL: @floatcmp_true_fixed_zero( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp true <2 x float> zeroinitializer, zeroinitializer ret <2 x i1> %res @@ -821,7 +821,7 @@ define @floatcmp_true_scalable_zero() { define <2 x i1> @floatcmp_true_fixed_one() { ; CHECK-LABEL: @floatcmp_true_fixed_one( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %res = fcmp true <2 x float> , ret <2 x i1> %res diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll index 8077f5ad5c6737..21653d800dce2d 100644 --- a/llvm/test/Transforms/InstSimplify/compare.ll +++ b/llvm/test/Transforms/InstSimplify/compare.ll @@ -1287,7 +1287,7 @@ define i1 @mul3(i32 %X, i32 %Y) { define <2 x i1> @mul3v(<2 x i32> %X, <2 x i32> %Y) { ; CHECK-LABEL: @mul3v( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %XX = mul nsw <2 x i32> %X, %X %YY = mul nsw <2 x i32> %Y, %Y @@ -1465,7 +1465,7 @@ define i1 @compare_always_true_slt(i16 %a) { define <2 x i1> @compare_always_true_slt_splat(<2 x i16> %a) { ; CHECK-LABEL: @compare_always_true_slt_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %t1 = zext <2 x i16> %a to <2 x i32> %t2 = sub <2 x i32> zeroinitializer, %t1 @@ -1485,7 +1485,7 @@ define i1 @compare_always_true_sle(i16 %a) { define <2 x i1> @compare_always_true_sle_splat(<2 x i16> %a) { ; CHECK-LABEL: @compare_always_true_sle_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %t1 = zext <2 x i16> %a to <2 x i32> %t2 = sub <2 x i32> zeroinitializer, %t1 @@ -1565,7 +1565,7 @@ define i1 @compare_always_true_ne(i16 %a) { define <2 x i1> @compare_always_true_ne_splat(<2 x i16> %a) { ; CHECK-LABEL: @compare_always_true_ne_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %t1 = zext <2 x i16> %a to <2 x i32> %t2 = sub <2 x i32> zeroinitializer, %t1 @@ -1725,7 +1725,7 @@ define i1 @icmp_ne_const(i32 %a) { define <2 x i1> @icmp_ne_const_vec(<2 x i32> %a) { ; CHECK-LABEL: @icmp_ne_const_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %b = mul nsw <2 x i32> %a, %c = icmp ne <2 x i32> %b, @@ -1851,7 +1851,7 @@ define i1 @icmp_shl_1_V_ule_2147483648(i32 %V) { define <2 x i1> @icmp_shl_1_ule_signmask(<2 x i8> %V) { ; CHECK-LABEL: @icmp_shl_1_ule_signmask( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %shl = shl <2 x i8> , %V %cmp = icmp ule <2 x i8> %shl, @@ -1860,7 +1860,7 @@ define <2 x i1> @icmp_shl_1_ule_signmask(<2 x i8> %V) { define <2 x i1> @icmp_shl_1_ule_signmask_poison(<2 x i8> %V) { ; CHECK-LABEL: @icmp_shl_1_ule_signmask_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %shl = shl <2 x i8> , %V %cmp = icmp ule <2 x i8> %shl, @@ -1869,7 +1869,7 @@ define <2 x i1> @icmp_shl_1_ule_signmask_poison(<2 x i8> %V) { define <2 x i1> @icmp_shl_1_ule_signmask_poison2(<2 x i8> %V) { ; CHECK-LABEL: @icmp_shl_1_ule_signmask_poison2( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %shl = shl <2 x i8> , %V %cmp = icmp ule <2 x i8> %shl, @@ -1914,7 +1914,7 @@ define i1 @shl_1_cmp_ne_nonpow2(i32 %x) { define <2 x i1> @shl_1_cmp_ne_nonpow2_splat(<2 x i32> %x) { ; CHECK-LABEL: @shl_1_cmp_ne_nonpow2_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %s = shl <2 x i32> , %x %c = icmp ne <2 x i32> %s, @@ -1923,7 +1923,7 @@ define <2 x i1> @shl_1_cmp_ne_nonpow2_splat(<2 x i32> %x) { define <2 x i1> @shl_1_cmp_ne_nonpow2_splat_poison(<2 x i32> %x) { ; CHECK-LABEL: @shl_1_cmp_ne_nonpow2_splat_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %s = shl <2 x i32> , %x %c = icmp ne <2 x i32> %s, @@ -1941,7 +1941,7 @@ define i1 @shl_pow2_cmp_eq_nonpow2(i32 %x) { define <2 x i1> @shl_pow21_cmp_ne_nonpow2_splat_poison(<2 x i32> %x) { ; CHECK-LABEL: @shl_pow21_cmp_ne_nonpow2_splat_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %s = shl <2 x i32> , %x %c = icmp ne <2 x i32> %s, @@ -1965,7 +1965,7 @@ define i1 @shl_pow2_cmp_ne_zero(i32 %x) { define <2 x i1> @shl_pow2_cmp_ne_zero_splat(<2 x i32> %x) { ; CHECK-LABEL: @shl_pow2_cmp_ne_zero_splat( -; CHECK-NEXT: [[S:%.*]] = shl <2 x i32> , [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = shl <2 x i32> splat (i32 16), [[X:%.*]] ; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[S]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[C]] ; @@ -1985,7 +1985,7 @@ define i1 @shl_pow2_cmp_eq_zero_nuw(i32 %x) { define <2 x i1> @shl_pow2_cmp_ne_zero_nuw_splat_poison(<2 x i32> %x) { ; CHECK-LABEL: @shl_pow2_cmp_ne_zero_nuw_splat_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %s = shl nuw <2 x i32> , %x %c = icmp ne <2 x i32> %s, @@ -2730,7 +2730,7 @@ define i1 @icmp_nsw_i64(i64 %V) { define <4 x i1> @icmp_nsw_vec(<4 x i32> %V) { ; CHECK-LABEL: @icmp_nsw_vec( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %add5 = add <4 x i32> %V, %add6 = add nsw <4 x i32> %V, @@ -2937,7 +2937,7 @@ define i1 @ctpop_ne_big_bitwidth(i73 %x) { define <2 x i1> @ctpop_slt_bitwidth_plus1_splat(<2 x i13> %x) { ; CHECK-LABEL: @ctpop_slt_bitwidth_plus1_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %pop = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> %x) %cmp = icmp slt <2 x i13> %pop, @@ -2949,7 +2949,7 @@ define <2 x i1> @ctpop_slt_bitwidth_plus1_splat(<2 x i13> %x) { define <2 x i1> @ctpop_slt_bitwidth_splat(<2 x i13> %x) { ; CHECK-LABEL: @ctpop_slt_bitwidth_splat( ; CHECK-NEXT: [[POP:%.*]] = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> [[X:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], splat (i13 13) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %pop = call <2 x i13> @llvm.ctpop.v2i13(<2 x i13> %x) @@ -3030,7 +3030,7 @@ define i1 @ctlz_ne_big_bitwidth(i73 %x) { define <2 x i1> @ctlz_slt_bitwidth_plus1_splat(<2 x i13> %x) { ; CHECK-LABEL: @ctlz_slt_bitwidth_plus1_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %pop = call <2 x i13> @llvm.ctlz.v2i13(<2 x i13> %x) %cmp = icmp slt <2 x i13> %pop, @@ -3042,7 +3042,7 @@ define <2 x i1> @ctlz_slt_bitwidth_plus1_splat(<2 x i13> %x) { define <2 x i1> @ctlz_slt_bitwidth_splat(<2 x i13> %x) { ; CHECK-LABEL: @ctlz_slt_bitwidth_splat( ; CHECK-NEXT: [[POP:%.*]] = call <2 x i13> @llvm.ctlz.v2i13(<2 x i13> [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], splat (i13 13) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %pop = call <2 x i13> @llvm.ctlz.v2i13(<2 x i13> %x) @@ -3123,7 +3123,7 @@ define i1 @cttz_ne_big_bitwidth(i73 %x) { define <2 x i1> @cttz_slt_bitwidth_plus1_splat(<2 x i13> %x) { ; CHECK-LABEL: @cttz_slt_bitwidth_plus1_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %pop = call <2 x i13> @llvm.cttz.v2i13(<2 x i13> %x) %cmp = icmp slt <2 x i13> %pop, @@ -3135,7 +3135,7 @@ define <2 x i1> @cttz_slt_bitwidth_plus1_splat(<2 x i13> %x) { define <2 x i1> @cttz_slt_bitwidth_splat(<2 x i13> %x) { ; CHECK-LABEL: @cttz_slt_bitwidth_splat( ; CHECK-NEXT: [[POP:%.*]] = call <2 x i13> @llvm.cttz.v2i13(<2 x i13> [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i13> [[POP]], splat (i13 13) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %pop = call <2 x i13> @llvm.cttz.v2i13(<2 x i13> %x) diff --git a/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll b/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll index 92d6cc30d6248e..34fa05856b22db 100644 --- a/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll +++ b/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll @@ -57,7 +57,7 @@ define i8 @knownbits_allones(i8 %x, i8 %y) { define <2 x i8> @add_vec(<2 x i8> %x) { ; CHECK-LABEL: @add_vec( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %ret = add nuw <2 x i8> %x, ret <2 x i8> %ret diff --git a/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll b/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll index 3f4a08807a4b41..1ea9f7fb8e46d0 100644 --- a/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll +++ b/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll @@ -72,7 +72,7 @@ define i8 @knownbits_negativeorzero(i8 %x, i8 %y) { define <2 x i8> @shl_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_vec( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %ret = shl nuw <2 x i8> , %x ret <2 x i8> %ret diff --git a/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll b/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll index 48cc8895aebbca..92e43654806a4b 100644 --- a/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll +++ b/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll @@ -121,7 +121,7 @@ define i64 @ctpop_x_and_negx_nz(i64 %x) { define <2 x i32> @ctpop_shl1_vec(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_shl1_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 1) ; %shl = shl <2 x i32> , %x %cnt = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %shl) @@ -141,7 +141,7 @@ define <2 x i32> @ctpop_shl2_1_vec(<2 x i32> %x) { define <2 x i32> @ctpop_lshr_intmin_vec(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_lshr_intmin_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 1) ; %shr = lshr <2 x i32> , %x %cnt = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %shr) @@ -162,7 +162,7 @@ define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec(<2 x i32> %x) { define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_lshr_intmin_intmin_plus1_vec_nz( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> , [[X1]] ; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHR]]) ; CHECK-NEXT: ret <2 x i32> [[CNT]] @@ -176,7 +176,7 @@ define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) { define <2 x i32> @ctpop_shl2_1_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_shl2_1_vec_nz( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 15) ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> , [[AND]] ; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHL]]) ; CHECK-NEXT: ret <2 x i32> [[CNT]] @@ -202,7 +202,7 @@ define <2 x i32> @ctpop_x_and_negx_vec(<2 x i32> %x) { define <2 x i32> @ctpop_x_and_negx_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_x_and_negx_vec_nz( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> zeroinitializer, [[X1]] ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[SUB]], [[X]] ; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[AND]]) diff --git a/llvm/test/Transforms/InstSimplify/div.ll b/llvm/test/Transforms/InstSimplify/div.ll index e2bc121aee4571..6566fa0292a252 100644 --- a/llvm/test/Transforms/InstSimplify/div.ll +++ b/llvm/test/Transforms/InstSimplify/div.ll @@ -402,8 +402,8 @@ define <2 x i8> @udiv_exact_trailing_zeros(<2 x i8> %x) { define <2 x i8> @udiv_exact_trailing_zeros_eq(<2 x i8> %x) { ; CHECK-LABEL: @udiv_exact_trailing_zeros_eq( -; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = udiv exact <2 x i8> [[O]], +; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 28) +; CHECK-NEXT: [[R:%.*]] = udiv exact <2 x i8> [[O]], splat (i8 12) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %o = or <2 x i8> %x, @@ -428,7 +428,7 @@ define i8 @udiv_trailing_zeros(i8 %x) { define <2 x i8> @udiv_exact_trailing_zeros_nonuniform_vector(<2 x i8> %x) { ; CHECK-LABEL: @udiv_exact_trailing_zeros_nonuniform_vector( -; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 3) ; CHECK-NEXT: [[R:%.*]] = udiv exact <2 x i8> [[O]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstSimplify/exp10.ll b/llvm/test/Transforms/InstSimplify/exp10.ll index 11617ceec666d0..a546bb1255d854 100644 --- a/llvm/test/Transforms/InstSimplify/exp10.ll +++ b/llvm/test/Transforms/InstSimplify/exp10.ll @@ -9,8 +9,8 @@ declare @llvm.exp10.nxv2f32() define float @exp10_exp10(float %x) { -; CHECK-LABEL: define float @exp10_exp10 -; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-LABEL: define float @exp10_exp10( +; CHECK-SAME: float [[X:%.*]]) { ; CHECK-NEXT: [[EXP100:%.*]] = call float @llvm.exp10.f32(float [[X]]) ; CHECK-NEXT: [[EXP101:%.*]] = call float @llvm.exp10.f32(float [[EXP100]]) ; CHECK-NEXT: ret float [[EXP101]] @@ -21,8 +21,8 @@ define float @exp10_exp10(float %x) { } define <2 x float> @exp10_exp10_vector(<2 x float> %x) { -; CHECK-LABEL: define <2 x float> @exp10_exp10_vector -; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-LABEL: define <2 x float> @exp10_exp10_vector( +; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: [[EXP100:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[X]]) ; CHECK-NEXT: [[EXP101:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[EXP100]]) ; CHECK-NEXT: ret <2 x float> [[EXP101]] @@ -33,8 +33,8 @@ define <2 x float> @exp10_exp10_vector(<2 x float> %x) { } define float @exp10_exp10_const(float %x) { -; CHECK-LABEL: define float @exp10_exp10_const -; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-LABEL: define float @exp10_exp10_const( +; CHECK-SAME: float [[X:%.*]]) { ; CHECK-NEXT: [[EXP101:%.*]] = call float @llvm.exp10.f32(float 0x7FF0000000000000) ; CHECK-NEXT: ret float [[EXP101]] ; @@ -44,8 +44,8 @@ define float @exp10_exp10_const(float %x) { } define @exp10_exp10_scalable_vector( %x) { -; CHECK-LABEL: define @exp10_exp10_scalable_vector -; CHECK-SAME: ( [[X:%.*]]) { +; CHECK-LABEL: define @exp10_exp10_scalable_vector( +; CHECK-SAME: [[X:%.*]]) { ; CHECK-NEXT: [[EXP100:%.*]] = call @llvm.exp10.nxv2f32( [[X]]) ; CHECK-NEXT: [[EXP101:%.*]] = call @llvm.exp10.nxv2f32( [[EXP100]]) ; CHECK-NEXT: ret [[EXP101]] @@ -101,7 +101,7 @@ define <2 x float> @exp10_undef_vector() { define <2 x float> @exp10_zero_vector() { ; CHECK-LABEL: define <2 x float> @exp10_zero_vector() { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> zeroinitializer) ret <2 x float> %ret @@ -118,7 +118,7 @@ define @exp10_zero_scalable_vector() { define <2 x float> @exp10_zero_negzero_vector() { ; CHECK-LABEL: define <2 x float> @exp10_zero_negzero_vector() { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) ; %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) ret <2 x float> %ret @@ -262,7 +262,7 @@ define ppc_fp128 @canonicalize_noncanonical_zero_1_ppcf128() { define <2 x float> @exp10_splat_4() { ; CHECK-LABEL: define <2 x float> @exp10_splat_4() { -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+04) ; %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) ret <2 x float> %ret @@ -270,7 +270,7 @@ define <2 x float> @exp10_splat_4() { define <2 x float> @exp10_splat_qnan() { ; CHECK-LABEL: define <2 x float> @exp10_splat_qnan() { -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> splat (float 0x7FF8000000000000)) ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) @@ -279,7 +279,7 @@ define <2 x float> @exp10_splat_qnan() { define <2 x float> @exp10_splat_inf() { ; CHECK-LABEL: define <2 x float> @exp10_splat_inf() { -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> splat (float 0x7FF0000000000000)) ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) @@ -288,7 +288,7 @@ define <2 x float> @exp10_splat_inf() { define <2 x float> @exp10_splat_neginf() { ; CHECK-LABEL: define <2 x float> @exp10_splat_neginf() { -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> splat (float 0xFFF0000000000000)) ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> ) diff --git a/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll b/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll index b1d772890aff83..963953ad2b3bc6 100644 --- a/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll +++ b/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll @@ -80,7 +80,7 @@ define float @fadd_unary_fnegx(float %x) #0 { define <2 x float> @fadd_binary_fnegx_commute_vec(<2 x float> %x) #0 { ; CHECK-LABEL: @fadd_binary_fnegx_commute_vec( -; CHECK-NEXT: [[NEGX:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> , <2 x float> [[X:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +; CHECK-NEXT: [[NEGX:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[X:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[X]], <2 x float> [[NEGX]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -415,7 +415,7 @@ define double @frem_negzero_by_x(double %x) #0 { define <2 x double> @frem_negzero_by_x_vec_poison(<2 x double> %x) #0 { ; CHECK-LABEL: @frem_negzero_by_x_vec_poison( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double -0.000000e+00) ; %r = call nnan <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double> , <2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x double> %r diff --git a/llvm/test/Transforms/InstSimplify/fast-math.ll b/llvm/test/Transforms/InstSimplify/fast-math.ll index 287f30b162f804..61bb5b976b98bb 100644 --- a/llvm/test/Transforms/InstSimplify/fast-math.ll +++ b/llvm/test/Transforms/InstSimplify/fast-math.ll @@ -397,7 +397,7 @@ define double @frem_negzero_by_x(double %x) { define <2 x double> @frem_negzero_by_x_vec_poison(<2 x double> %x) { ; CHECK-LABEL: @frem_negzero_by_x_vec_poison( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double -0.000000e+00) ; %r = frem nnan <2 x double> , %x ret <2 x double> %r @@ -469,7 +469,7 @@ define float @fdiv_neg_swapped2(float %f) { define <2 x float> @fdiv_neg_vec_poison_elt(<2 x float> %f) { ; CHECK-LABEL: @fdiv_neg_vec_poison_elt( -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float -1.000000e+00) ; %neg = fsub <2 x float> , %f %div = fdiv nnan <2 x float> %f, %neg diff --git a/llvm/test/Transforms/InstSimplify/fdiv.ll b/llvm/test/Transforms/InstSimplify/fdiv.ll index fb59011b91d5bd..91f9b556d66250 100644 --- a/llvm/test/Transforms/InstSimplify/fdiv.ll +++ b/llvm/test/Transforms/InstSimplify/fdiv.ll @@ -148,7 +148,7 @@ define <2 x float> @fdiv_nnan_nsz_ninf_by_zero_v2f32(<2 x float> %x) { define <2 x float> @fdiv_nnan_nsz_ninf_by_negzero_v2f32(<2 x float> %x) { ; CHECK-LABEL: @fdiv_nnan_nsz_ninf_by_negzero_v2f32( -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz <2 x float> [[X:%.*]], +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz <2 x float> [[X:%.*]], splat (float -0.000000e+00) ; CHECK-NEXT: ret <2 x float> [[FDIV]] ; %fdiv = fdiv nnan nsz <2 x float> %x, diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll index 32ea4cb7cd198d..9a078a8f569dab 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll @@ -15,7 +15,7 @@ define float @fsub_-0_x(float %a) #0 { define <2 x float> @fsub_-0_x_vec(<2 x float> %a) #0 { ; CHECK-LABEL: @fsub_-0_x_vec( -; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> , <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: [[RET:%.*]] = fneg <2 x float> [[T1]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; @@ -68,8 +68,8 @@ define float @fneg_x(float %a) #0 { define <2 x float> @fsub_-0_-0_x_vec(<2 x float> %a) #0 { ; CHECK-LABEL: @fsub_-0_-0_x_vec( -; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> , <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> , <2 x float> [[T1]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[T1]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: ret <2 x float> [[RET]] ; %t1 = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float>, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll index 7a35f09f03b995..d3178a103d42cd 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll @@ -221,7 +221,7 @@ define float @src_mul_nzero_neg(float nofpclass(inf nan pzero psub pnorm) %f) { define <2 x float> @src_mul_zero_neg(<2 x float> nofpclass(inf nan pzero psub pnorm) %f) { ; CHECK-LABEL: @src_mul_zero_neg( -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float -0.000000e+00) ; %r = fmul <2 x float> , %f ret <2 x float> %r @@ -372,7 +372,7 @@ define float @fabs_select_positive_constants(i32 %c) { define <2 x float> @fabs_select_positive_constants_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_positive_constants_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 1.000000e+00), <2 x float> splat (float 2.000000e+00) ; CHECK-NEXT: ret <2 x float> [[SELECT]] ; %cmp = icmp eq i32 %c, 0 @@ -397,7 +397,7 @@ define float @fabs_select_constant_variable(i32 %c, float %x) { define <2 x float> @fabs_select_constant_variable_vector(i32 %c, <2 x float> %x) { ; CHECK-LABEL: @fabs_select_constant_variable_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> [[X:%.*]] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 1.000000e+00), <2 x float> [[X:%.*]] ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -423,7 +423,7 @@ define float @fabs_select_neg0_pos0(i32 %c) { define <2 x float> @fabs_select_neg0_pos0_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_neg0_pos0_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> zeroinitializer +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float -0.000000e+00), <2 x float> zeroinitializer ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -449,7 +449,7 @@ define float @fabs_select_neg0_neg1(i32 %c) { define <2 x float> @fabs_select_neg0_neg1_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_neg0_neg1_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float -0.000000e+00), <2 x float> splat (float -1.000000e+00) ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -474,7 +474,7 @@ define float @fabs_select_nan_nan(i32 %c) { define <2 x float> @fabs_select_nan_nan_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_nan_nan_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0x7FF8000000000000), <2 x float> splat (float 0x7FF8000100000000) ; CHECK-NEXT: ret <2 x float> [[SELECT]] ; %cmp = icmp eq i32 %c, 0 @@ -499,7 +499,7 @@ define float @fabs_select_negnan_nan(i32 %c) { define <2 x float> @fabs_select_negnan_nan_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_negnan_nan_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0xFFF8000000000000), <2 x float> splat (float 0x7FF8000000000000) ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -525,7 +525,7 @@ define float @fabs_select_negnan_negnan(i32 %c) { define <2 x float> @fabs_select_negnan_negnan_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_negnan_negnan_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0xFFF8000000000000), <2 x float> splat (float 0x7FF8000100000000) ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -551,7 +551,7 @@ define float @fabs_select_negnan_negzero(i32 %c) { define <2 x float> @fabs_select_negnan_negzero_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_negnan_negzero_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0xFFF8000000000000), <2 x float> splat (float -0.000000e+00) ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -577,7 +577,7 @@ define float @fabs_select_negnan_zero(i32 %c) { define <2 x float> @fabs_select_negnan_zero_vector(i32 %c) { ; CHECK-LABEL: @fabs_select_negnan_zero_vector( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> zeroinitializer +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0xFFF8000000000000), <2 x float> zeroinitializer ; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; @@ -651,7 +651,7 @@ define float @fabs_sqrt_nnan_fabs(float %a) { define float @fabs_select_positive_constants_vector_extract(i32 %c) { ; CHECK-LABEL: @fabs_select_positive_constants_vector_extract( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> , <2 x float> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 1.000000e+00), <2 x float> splat (float 2.000000e+00) ; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x float> [[SELECT]], i32 0 ; CHECK-NEXT: ret float [[EXTRACT]] ; diff --git a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll index e9d5c353cbccfc..45e329bca0b08f 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll @@ -285,7 +285,7 @@ define i1 @UIToFP_is_nan_or_positive_or_zero(i32 %x) { define <2 x i1> @UIToFP_is_nan_or_positive_or_zero_vec(<2 x i32> %x) { ; CHECK-LABEL: @UIToFP_is_nan_or_positive_or_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %a = uitofp <2 x i32> %x to <2 x float> %r = fcmp uge <2 x float> %a, zeroinitializer @@ -303,7 +303,7 @@ define i1 @UIToFP_is_positive_or_zero(i32 %x) { define <2 x i1> @UIToFP_is_positive_or_zero_vec(<2 x i32> %x) { ; CHECK-LABEL: @UIToFP_is_positive_or_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %a = uitofp <2 x i32> %x to <2 x float> %r = fcmp oge <2 x float> %a, zeroinitializer @@ -321,7 +321,7 @@ define i1 @UIToFP_nnan_is_positive_or_zero(i32 %x) { define <2 x i1> @UIToFP_nnan_is_positive_or_zero_vec(<2 x i32> %x) { ; CHECK-LABEL: @UIToFP_nnan_is_positive_or_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %a = uitofp <2 x i32> %x to <2 x float> %r = fcmp nnan oge <2 x float> %a, zeroinitializer @@ -395,7 +395,7 @@ define i1 @fabs_is_nan_or_positive_or_zero(double %x) { define <2 x i1> @fabs_is_nan_or_positive_or_zero_vec(<2 x double> %x) { ; CHECK-LABEL: @fabs_is_nan_or_positive_or_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %fabs = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %x) %cmp = fcmp uge <2 x double> %fabs, zeroinitializer @@ -413,7 +413,7 @@ define i1 @fabs_nnan_is_positive_or_zero(double %x) { define <2 x i1> @fabs_nnan_is_positive_or_zero_vec(<2 x double> %x) { ; CHECK-LABEL: @fabs_nnan_is_positive_or_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %fabs = tail call nnan <2 x double> @llvm.fabs.v2f64(<2 x double> %x) %cmp = fcmp oge <2 x double> %fabs, zeroinitializer @@ -459,7 +459,7 @@ define i1 @fabs_fcmp_olt0_-assume-nnan_is_positive_or_zero(double %x) { define <2 x i1> @fabs_fcmp-nnan_is_positive_or_zero_vec(<2 x double> %x) { ; CHECK-LABEL: @fabs_fcmp-nnan_is_positive_or_zero_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %fabs = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %x) %cmp = fcmp nnan oge <2 x double> %fabs, zeroinitializer @@ -826,7 +826,7 @@ define <2 x i1> @minnum_uge_small_min_constant(<2 x float> %x) { define <2 x i1> @minnum_olt_small_min_constant(<2 x float> %x) { ; CHECK-LABEL: @minnum_olt_small_min_constant( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %min = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> ) %cmp = fcmp olt <2 x float> %min, @@ -999,7 +999,7 @@ define <2 x i1> @maxnum_ule_large_max_constant(<2 x float> %x) { define <2 x i1> @maxnum_ogt_large_max_constant(<2 x float> %x) { ; CHECK-LABEL: @maxnum_ogt_large_max_constant( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %max = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> ) %cmp = fcmp ogt <2 x float> %max, @@ -1307,7 +1307,7 @@ define i1 @assumed_positive_une_with_negative_constant(float %a) { define <2 x i1> @known_positive_uge_with_negative_constant_splat_vec(<2 x float> %a) { ; CHECK-LABEL: @known_positive_uge_with_negative_constant_splat_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %call = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) %cmp = fcmp uge <2 x float> %call, @@ -1325,7 +1325,7 @@ define i1 @known_positive_oeq_with_negative_constant(half %a) { define <2 x i1> @known_positive_une_with_negative_constant_splat_vec(<2 x i32> %a) { ; CHECK-LABEL: @known_positive_une_with_negative_constant_splat_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %call = uitofp <2 x i32> %a to <2 x half> %cmp = fcmp une <2 x half> %call, @@ -1377,7 +1377,7 @@ define <2 x i1> @orderedCompareWithNaNVector_poison_elt(<2 x double> %A) { define <2 x i1> @unorderedCompareWithNaNVector_poison_elt(<2 x double> %A) { ; CHECK-LABEL: @unorderedCompareWithNaNVector_poison_elt( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = fcmp ult <2 x double> %A, ret <2 x i1> %cmp @@ -1486,7 +1486,7 @@ define i1 @is_finite_or_nan(i1 %c, double %x) { define <2 x i1> @is_finite_or_nan_commute(<2 x i8> %x) { ; CHECK-LABEL: @is_finite_or_nan_commute( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cast = uitofp <2 x i8> %x to <2 x float> %r = fcmp une <2 x float> , %cast @@ -1533,7 +1533,7 @@ define i1 @is_finite_assume(i1 %c, double %x) { define <2 x i1> @is_finite_commute(<2 x i8> %x) { ; CHECK-LABEL: @is_finite_commute( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cast = uitofp <2 x i8> %x to <2 x float> %r = fcmp one <2 x float> , %cast diff --git a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll index 668a93ddf5a426..fff6cfd8a3b4bc 100644 --- a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll +++ b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll @@ -196,7 +196,7 @@ define <2 x float> @test_minnum_const_inf_nnan_comm_vec(<2 x float> %x) { define <2 x float> @test_maxnum_const_inf_nnan_comm_vec(<2 x float> %x) { ; CHECK-LABEL: @test_maxnum_const_inf_nnan_comm_vec( -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 0x7FF0000000000000) ; %r = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %r @@ -204,7 +204,7 @@ define <2 x float> @test_maxnum_const_inf_nnan_comm_vec(<2 x float> %x) { define <2 x float> @test_maximum_const_inf_nnan_comm_vec(<2 x float> %x) { ; CHECK-LABEL: @test_maximum_const_inf_nnan_comm_vec( -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 0x7FF0000000000000) ; %r = call nnan <2 x float> @llvm.maximum.v2f32(<2 x float> , <2 x float> %x) ret <2 x float> %r @@ -727,7 +727,7 @@ define float @minnum_neginf(float %x) { define <2 x double> @minnum_neginf_commute_vec(<2 x double> %x) { ; CHECK-LABEL: @minnum_neginf_commute_vec( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0xFFF0000000000000) ; %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> , <2 x double> %x) ret <2 x double> %r @@ -813,7 +813,7 @@ define float @maxnum_x_y_maxnum_z(float %x, float %y, float %z) { define <2 x double> @maxnum_inf(<2 x double> %x) { ; CHECK-LABEL: @maxnum_inf( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF0000000000000) ; %val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double>) ret <2 x double> %val @@ -915,7 +915,7 @@ define <2 x double> @minimum_nan_op1_vec_partial_poison(<2 x double> %x) { define <2 x double> @minimum_nan_op1_vec(<2 x double> %x) { ; CHECK-LABEL: @minimum_nan_op1_vec( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0x7FF800DEAD00DEAD) ; %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %r @@ -1164,7 +1164,7 @@ define float @minimum_neginf(float %x) { define <2 x double> @minimum_neginf_commute_vec(<2 x double> %x) { ; CHECK-LABEL: @minimum_neginf_commute_vec( -; CHECK-NEXT: [[R:%.*]] = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x double> @llvm.minimum.v2f64(<2 x double> splat (double 0xFFF0000000000000), <2 x double> [[X:%.*]]) ; CHECK-NEXT: ret <2 x double> [[R]] ; %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> %x) @@ -1185,7 +1185,7 @@ define float @minimum_inf(float %x) { define <2 x double> @maximum_inf(<2 x double> %x) { ; CHECK-LABEL: @maximum_inf( -; CHECK-NEXT: [[VAL:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[X:%.*]], <2 x double> ) +; CHECK-NEXT: [[VAL:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[X:%.*]], <2 x double> splat (double 0x7FF0000000000000)) ; CHECK-NEXT: ret <2 x double> [[VAL]] ; %val = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double>) diff --git a/llvm/test/Transforms/InstSimplify/fp-nan.ll b/llvm/test/Transforms/InstSimplify/fp-nan.ll index 06b23200bafff8..22d01ac8c2ad1a 100644 --- a/llvm/test/Transforms/InstSimplify/fp-nan.ll +++ b/llvm/test/Transforms/InstSimplify/fp-nan.ll @@ -103,7 +103,7 @@ define @fmul_nan_op0_scalable_vec_1( define <2 x float> @fmul_nan_op1(<2 x float> %x) { ; CHECK-LABEL: @fmul_nan_op1( -; CHECK-NEXT: ret <2 x float> +; CHECK-NEXT: ret <2 x float> splat (float 0x7FF8000000000000) ; %r = fmul <2 x float> %x, ret <2 x float> %r @@ -121,7 +121,7 @@ define @fmul_nan_op1_scalable_vec( %x define <2 x double> @fdiv_nan_op0(<2 x double> %x) { ; CHECK-LABEL: @fdiv_nan_op0( -; CHECK-NEXT: ret <2 x double> +; CHECK-NEXT: ret <2 x double> splat (double 0xFFF800000000000F) ; %r = fdiv <2 x double> , %x ret <2 x double> %r diff --git a/llvm/test/Transforms/InstSimplify/fptoi-range.ll b/llvm/test/Transforms/InstSimplify/fptoi-range.ll index 95f2a9d50793c6..f9b28347ca9f94 100644 --- a/llvm/test/Transforms/InstSimplify/fptoi-range.ll +++ b/llvm/test/Transforms/InstSimplify/fptoi-range.ll @@ -157,7 +157,7 @@ define i1 @f16_ui_min2(half %f) { define <2 x i1> @v2f16_si_max(<2 x half> %f) { ; CHECK-LABEL: @v2f16_si_max( ; CHECK-NEXT: [[I:%.*]] = fptosi <2 x half> [[F:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i32> [[I]], +; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i32> [[I]], splat (i32 65504) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = fptosi <2 x half> %f to <2 x i32> @@ -177,7 +177,7 @@ define <2 x i1> @v2f16_si_max2(<2 x half> %f) { define <2 x i1> @v2f16_si16_max2(<2 x half> %f) { ; CHECK-LABEL: @v2f16_si16_max2( ; CHECK-NEXT: [[I:%.*]] = fptosi <2 x half> [[F:%.*]] to <2 x i16> -; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i16> [[I]], +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i16> [[I]], splat (i16 -32) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = fptosi <2 x half> %f to <2 x i16> @@ -187,7 +187,7 @@ define <2 x i1> @v2f16_si16_max2(<2 x half> %f) { define <2 x i1> @v2f16_si_min1(<2 x half> %f) { ; CHECK-LABEL: @v2f16_si_min1( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %i = fptosi <2 x half> %f to <2 x i32> %c = icmp sge <2 x i32> %i, @@ -197,7 +197,7 @@ define <2 x i1> @v2f16_si_min1(<2 x half> %f) { define <2 x i1> @v2f16_si16_min1(<2 x half> %f) { ; CHECK-LABEL: @v2f16_si16_min1( ; CHECK-NEXT: [[I:%.*]] = fptosi <2 x half> [[F:%.*]] to <2 x i16> -; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i16> [[I]], +; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i16> [[I]], splat (i16 32) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = fptosi <2 x half> %f to <2 x i16> @@ -208,7 +208,7 @@ define <2 x i1> @v2f16_si16_min1(<2 x half> %f) { define <2 x i1> @v2f16_si_min2(<2 x half> %f) { ; CHECK-LABEL: @v2f16_si_min2( ; CHECK-NEXT: [[I:%.*]] = fptosi <2 x half> [[F:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i32> [[I]], +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i32> [[I]], splat (i32 -65504) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = fptosi <2 x half> %f to <2 x i32> @@ -219,7 +219,7 @@ define <2 x i1> @v2f16_si_min2(<2 x half> %f) { define <2 x i1> @v2f16_ui_max1(<2 x half> %f) { ; CHECK-LABEL: @v2f16_ui_max1( ; CHECK-NEXT: [[I:%.*]] = fptoui <2 x half> [[F:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i32> [[I]], +; CHECK-NEXT: [[C:%.*]] = icmp sge <2 x i32> [[I]], splat (i32 65504) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = fptoui <2 x half> %f to <2 x i32> @@ -239,7 +239,7 @@ define <2 x i1> @v2f16_ui_max2(<2 x half> %f) { define <2 x i1> @v2f16_ui16_max2(<2 x half> %f) { ; CHECK-LABEL: @v2f16_ui16_max2( ; CHECK-NEXT: [[I:%.*]] = fptoui <2 x half> [[F:%.*]] to <2 x i16> -; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i16> [[I]], +; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i16> [[I]], splat (i16 -32) ; CHECK-NEXT: ret <2 x i1> [[C]] ; %i = fptoui <2 x half> %f to <2 x i16> @@ -249,7 +249,7 @@ define <2 x i1> @v2f16_ui16_max2(<2 x half> %f) { define <2 x i1> @v2f16_ui16_max3(<2 x half> %f) { ; CHECK-LABEL: @v2f16_ui16_max3( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %i = fptoui <2 x half> %f to <2 x i16> %c = icmp ule <2 x i16> %i, @@ -258,7 +258,7 @@ define <2 x i1> @v2f16_ui16_max3(<2 x half> %f) { define <2 x i1> @v2f16_ui_min1(<2 x half> %f) { ; CHECK-LABEL: @v2f16_ui_min1( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %i = fptoui <2 x half> %f to <2 x i32> %c = icmp sge <2 x i32> %i, diff --git a/llvm/test/Transforms/InstSimplify/frexp.ll b/llvm/test/Transforms/InstSimplify/frexp.ll index 63fe1dec692933..34cfce92bac43e 100644 --- a/llvm/test/Transforms/InstSimplify/frexp.ll +++ b/llvm/test/Transforms/InstSimplify/frexp.ll @@ -9,8 +9,8 @@ declare { , } @llvm.frexp.nxv2f32.nxv2i32 define { float, i32 } @frexp_frexp(float %x) { -; CHECK-LABEL: define { float, i32 } @frexp_frexp -; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-LABEL: define { float, i32 } @frexp_frexp( +; CHECK-SAME: float [[X:%.*]]) { ; CHECK-NEXT: [[FREXP0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) ; CHECK-NEXT: ret { float, i32 } [[FREXP0]] ; @@ -21,8 +21,8 @@ define { float, i32 } @frexp_frexp(float %x) { } define { <2 x float>, <2 x i32> } @frexp_frexp_vector(<2 x float> %x) { -; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_frexp_vector -; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_frexp_vector( +; CHECK-SAME: <2 x float> [[X:%.*]]) { ; CHECK-NEXT: [[FREXP0:%.*]] = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> [[X]]) ; CHECK-NEXT: ret { <2 x float>, <2 x i32> } [[FREXP0]] ; @@ -33,8 +33,8 @@ define { <2 x float>, <2 x i32> } @frexp_frexp_vector(<2 x float> %x) { } define { float, i32 } @frexp_frexp_const(float %x) { -; CHECK-LABEL: define { float, i32 } @frexp_frexp_const -; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-LABEL: define { float, i32 } @frexp_frexp_const( +; CHECK-SAME: float [[X:%.*]]) { ; CHECK-NEXT: ret { float, i32 } { float 6.562500e-01, i32 0 } ; %frexp0 = call { float, i32 } @llvm.frexp.f32.i32(float 42.0) @@ -44,8 +44,8 @@ define { float, i32 } @frexp_frexp_const(float %x) { } define { , } @frexp_frexp_scalable_vector( %x) { -; CHECK-LABEL: define { , } @frexp_frexp_scalable_vector -; CHECK-SAME: ( [[X:%.*]]) { +; CHECK-LABEL: define { , } @frexp_frexp_scalable_vector( +; CHECK-SAME: [[X:%.*]]) { ; CHECK-NEXT: [[FREXP0:%.*]] = call { , } @llvm.frexp.nxv2f32.nxv2i32( [[X]]) ; CHECK-NEXT: ret { , } [[FREXP0]] ; @@ -252,7 +252,7 @@ define { ppc_fp128, i32} @canonicalize_noncanonical_zero_1_ppcf128() { define { <2 x float>, <2 x i32> } @frexp_splat_4() { ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_splat_4() { -; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> , <2 x i32> } +; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> splat (float 5.000000e-01), <2 x i32> splat (i32 3) } ; %ret = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> ) ret { <2 x float>, <2 x i32> } %ret @@ -260,7 +260,7 @@ define { <2 x float>, <2 x i32> } @frexp_splat_4() { define { <2 x float>, <2 x i32> } @frexp_splat_qnan() { ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_splat_qnan() { -; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> , <2 x i32> zeroinitializer } +; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> splat (float 0x7FF8000000000000), <2 x i32> zeroinitializer } ; %ret = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> ) ret { <2 x float>, <2 x i32> } %ret @@ -268,7 +268,7 @@ define { <2 x float>, <2 x i32> } @frexp_splat_qnan() { define { <2 x float>, <2 x i32> } @frexp_splat_inf() { ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_splat_inf() { -; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> , <2 x i32> zeroinitializer } +; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> splat (float 0x7FF0000000000000), <2 x i32> zeroinitializer } ; %ret = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> ) ret { <2 x float>, <2 x i32> } %ret @@ -276,7 +276,7 @@ define { <2 x float>, <2 x i32> } @frexp_splat_inf() { define { <2 x float>, <2 x i32> } @frexp_splat_neginf() { ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_splat_neginf() { -; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> , <2 x i32> zeroinitializer } +; CHECK-NEXT: ret { <2 x float>, <2 x i32> } { <2 x float> splat (float 0xFFF0000000000000), <2 x i32> zeroinitializer } ; %ret = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> ) ret { <2 x float>, <2 x i32> } %ret diff --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll index f682fe82ca7986..276707894146e8 100644 --- a/llvm/test/Transforms/InstSimplify/gep.ll +++ b/llvm/test/Transforms/InstSimplify/gep.ll @@ -206,7 +206,7 @@ define ptr @ptr_idx_scalar() { define <2 x ptr> @ptr_idx_vector() { ; CHECK-LABEL: @ptr_idx_vector( -; CHECK-NEXT: ret <2 x ptr> getelementptr (i32, ptr null, <2 x i64> ) +; CHECK-NEXT: ret <2 x ptr> getelementptr (i32, ptr null, <2 x i64> splat (i64 1)) ; %gep = getelementptr i32, ptr null, <2 x i64> ret <2 x ptr> %gep @@ -224,7 +224,7 @@ define <4 x ptr> @ptr_idx_mix_scalar_vector(){ define <4 x ptr> @vector_idx_scalar() { ; CHECK-LABEL: @vector_idx_scalar( -; CHECK-NEXT: ret <4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> ) +; CHECK-NEXT: ret <4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> splat (i64 1)) ; %gep = getelementptr i32, <4 x ptr> zeroinitializer, i64 1 ret <4 x ptr> %gep @@ -232,7 +232,7 @@ define <4 x ptr> @vector_idx_scalar() { define <4 x ptr> @vector_idx_vector() { ; CHECK-LABEL: @vector_idx_vector( -; CHECK-NEXT: ret <4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> ) +; CHECK-NEXT: ret <4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> splat (i64 1)) ; %gep = getelementptr i32, <4 x ptr> zeroinitializer, <4 x i64> ret <4 x ptr> %gep @@ -370,6 +370,14 @@ define <8 x ptr> @gep_vector_index_op3_poison_constant_index_afterwards(ptr %ptr } define i64 @gep_array_of_scalable_vectors_ptrdiff(ptr %ptr) { +; CHECK-LABEL: @gep_array_of_scalable_vectors_ptrdiff( +; CHECK-NEXT: [[C1:%.*]] = getelementptr inbounds [8 x ], ptr [[PTR:%.*]], i64 4 +; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds [8 x ], ptr [[PTR]], i64 6 +; CHECK-NEXT: [[C1_INT:%.*]] = ptrtoint ptr [[C1]] to i64 +; CHECK-NEXT: [[C2_INT:%.*]] = ptrtoint ptr [[C2]] to i64 +; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[C2_INT]], [[C1_INT]] +; CHECK-NEXT: ret i64 [[DIFF]] +; %c1 = getelementptr inbounds [8 x ], ptr %ptr, i64 4 %c2 = getelementptr inbounds [8 x ], ptr %ptr, i64 6 %c1.int = ptrtoint ptr %c1 to i64 diff --git a/llvm/test/Transforms/InstSimplify/icmp-bool-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-bool-constant.ll index a501f995b6c975..350093dcda82d9 100644 --- a/llvm/test/Transforms/InstSimplify/icmp-bool-constant.ll +++ b/llvm/test/Transforms/InstSimplify/icmp-bool-constant.ll @@ -31,7 +31,7 @@ define <2 x i1> @eq_f(<2 x i1> %a) { define <2 x i1> @ne_t(<2 x i1> %a) { ; CHECK-LABEL: @ne_t( -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %r = icmp ne <2 x i1> %a, @@ -72,7 +72,7 @@ define <2 x i1> @ugt_f(<2 x i1> %a) { define <2 x i1> @ult_t(<2 x i1> %a) { ; CHECK-LABEL: @ult_t( -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %r = icmp ult <2 x i1> %a, @@ -89,7 +89,7 @@ define <2 x i1> @ult_f(<2 x i1> %a) { define <2 x i1> @sgt_t(<2 x i1> %a) { ; CHECK-LABEL: @sgt_t( -; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[R]] ; %r = icmp sgt <2 x i1> %a, @@ -130,7 +130,7 @@ define <2 x i1> @uge_t(<2 x i1> %a) { define <2 x i1> @uge_f(<2 x i1> %a) { ; CHECK-LABEL: @uge_f( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %r = icmp uge <2 x i1> %a, ret <2 x i1> %r @@ -138,7 +138,7 @@ define <2 x i1> @uge_f(<2 x i1> %a) { define <2 x i1> @ule_t(<2 x i1> %a) { ; CHECK-LABEL: @ule_t( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %r = icmp ule <2 x i1> %a, ret <2 x i1> %r @@ -155,7 +155,7 @@ define <2 x i1> @ule_f(<2 x i1> %a) { define <2 x i1> @sge_t(<2 x i1> %a) { ; CHECK-LABEL: @sge_t( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %r = icmp sge <2 x i1> %a, ret <2 x i1> %r @@ -163,7 +163,7 @@ define <2 x i1> @sge_t(<2 x i1> %a) { define <2 x i1> @sge_t_poison_elt(<2 x i1> %a) { ; CHECK-LABEL: @sge_t_poison_elt( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %r = icmp sge <2 x i1> %a, ret <2 x i1> %r @@ -188,7 +188,7 @@ define <2 x i1> @sle_t(<2 x i1> %a) { define <2 x i1> @sle_f(<2 x i1> %a) { ; CHECK-LABEL: @sle_f( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %r = icmp sle <2 x i1> %a, ret <2 x i1> %r diff --git a/llvm/test/Transforms/InstSimplify/icmp-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-constant.ll index 4f3a5d3dba3b5c..21244f832cbb94 100644 --- a/llvm/test/Transforms/InstSimplify/icmp-constant.ll +++ b/llvm/test/Transforms/InstSimplify/icmp-constant.ll @@ -13,7 +13,7 @@ define i1 @tautological_ule(i8 %x) { define <2 x i1> @tautological_ule_vec(<2 x i8> %x) { ; CHECK-LABEL: @tautological_ule_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp ule <2 x i8> %x, ret <2 x i1> %cmp @@ -21,7 +21,7 @@ define <2 x i1> @tautological_ule_vec(<2 x i8> %x) { define <2 x i1> @tautological_ule_vec_partial_poison(<2 x i8> %x) { ; CHECK-LABEL: @tautological_ule_vec_partial_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp = icmp ule <2 x i8> %x, ret <2 x i1> %cmp @@ -63,7 +63,7 @@ define i1 @urem3(i32 %X) { define <2 x i1> @urem3_vec(<2 x i32> %X) { ; CHECK-LABEL: @urem3_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %A = urem <2 x i32> %X, %B = icmp ult <2 x i32> %A, @@ -72,7 +72,7 @@ define <2 x i1> @urem3_vec(<2 x i32> %X) { define <2 x i1> @urem3_vec_partial_poison(<2 x i32> %X) { ; CHECK-LABEL: @urem3_vec_partial_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %A = urem <2 x i32> %X, %B = icmp ult <2 x i32> %A, @@ -138,7 +138,7 @@ define i1 @udiv1(i32 %X) { define <2 x i1> @udiv1_vec(<2 x i32> %X) { ; CHECK-LABEL: @udiv1_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %A = udiv <2 x i32> %X, %B = icmp ult <2 x i32> %A, @@ -177,7 +177,7 @@ define i1 @sdiv1(i32 %X) { define <2 x i1> @sdiv1_vec(<2 x i32> %X) { ; CHECK-LABEL: @sdiv1_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %A = sdiv <2 x i32> %X, %B = icmp slt <2 x i32> %A, @@ -196,7 +196,7 @@ define i1 @shl5(i32 %X) { define <2 x i1> @shl5_vec(<2 x i32> %X) { ; CHECK-LABEL: @shl5_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %sub = shl nuw <2 x i32> , %X %cmp = icmp ugt <2 x i32> %sub, @@ -205,7 +205,7 @@ define <2 x i1> @shl5_vec(<2 x i32> %X) { define <2 x i1> @shl5_vec_partial_poison(<2 x i32> %X) { ; CHECK-LABEL: @shl5_vec_partial_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %sub = shl nuw <2 x i32> , %X %cmp = icmp ugt <2 x i32> %sub, @@ -243,7 +243,7 @@ define i1 @shl4(i32 %X) { define <2 x i1> @shl4_vec(<2 x i32> %X) { ; CHECK-LABEL: @shl4_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %sub = shl nsw <2 x i32> , %X %cmp = icmp sle <2 x i32> %sub, @@ -262,7 +262,7 @@ define i1 @icmp_shl_nsw_1(i64 %a) { define <2 x i1> @icmp_shl_nsw_1_vec(<2 x i64> %a) { ; CHECK-LABEL: @icmp_shl_nsw_1_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %shl = shl nsw <2 x i64> , %a %cmp = icmp sge <2 x i64> %shl, zeroinitializer @@ -605,7 +605,7 @@ define i1 @tautological9(i32 %x) { define <2 x i1> @tautological9_vec(<2 x i32> %x) { ; CHECK-LABEL: @tautological9_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %add = add nuw <2 x i32> %x, %cmp = icmp ne <2 x i32> %add, @@ -826,7 +826,7 @@ define i1 @add_nsw_pos_const6(i32 %x) { define <2 x i1> @add_nsw_pos_const5_splat_vec(<2 x i32> %x) { ; CHECK-LABEL: @add_nsw_pos_const5_splat_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %add = add nsw <2 x i32> %x, %cmp = icmp ne <2 x i32> %add, @@ -891,7 +891,7 @@ define i1 @mul_nuw_urem_cmp_constant1(i8 %x) { define <2 x i1> @mul_nuw_urem_cmp_constant_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @mul_nuw_urem_cmp_constant_vec_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %m = mul nuw <2 x i8> %x, %r = icmp ne <2 x i8> %m, @@ -902,7 +902,7 @@ define <2 x i1> @mul_nuw_urem_cmp_constant_vec_splat(<2 x i8> %x) { define <2 x i1> @mul_nuw_urem_cmp_constant_vec_splat_poison1(<2 x i8> %x) { ; CHECK-LABEL: @mul_nuw_urem_cmp_constant_vec_splat_poison1( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %m = mul nuw <2 x i8> %x, %r = icmp ne <2 x i8> %m, @@ -913,7 +913,7 @@ define <2 x i1> @mul_nuw_urem_cmp_constant_vec_splat_poison1(<2 x i8> %x) { define <2 x i1> @mul_nuw_urem_cmp_constant_vec_splat_poison2(<2 x i8> %x) { ; CHECK-LABEL: @mul_nuw_urem_cmp_constant_vec_splat_poison2( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %m = mul nuw <2 x i8> %x, %r = icmp ne <2 x i8> %m, @@ -998,7 +998,7 @@ define i1 @mul_nsw_srem_cmp_constant1(i8 %x) { define <2 x i1> @mul_nsw_srem_cmp_constant_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @mul_nsw_srem_cmp_constant_vec_splat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %m = mul nsw <2 x i8> %x, %r = icmp ne <2 x i8> %m, @@ -1009,7 +1009,7 @@ define <2 x i1> @mul_nsw_srem_cmp_constant_vec_splat(<2 x i8> %x) { define <2 x i1> @mul_nsw_srem_cmp_constant_vec_splat_poison1(<2 x i8> %x) { ; CHECK-LABEL: @mul_nsw_srem_cmp_constant_vec_splat_poison1( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %m = mul nsw <2 x i8> %x, %r = icmp ne <2 x i8> %m, @@ -1020,7 +1020,7 @@ define <2 x i1> @mul_nsw_srem_cmp_constant_vec_splat_poison1(<2 x i8> %x) { define <2 x i1> @mul_nsw_srem_cmp_constant_vec_splat_poison2(<2 x i8> %x) { ; CHECK-LABEL: @mul_nsw_srem_cmp_constant_vec_splat_poison2( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %m = mul nsw <2 x i8> %x, %r = icmp ne <2 x i8> %m, @@ -1215,7 +1215,7 @@ define <2 x i1> @icmp_eq_constant_range_attr_vec(<2 x i8> range(i8 0, 10) %i) { define <2 x i1> @neg_icmp_eq_constant_range_attr_vec(<2 x i8> range(i8 0, 11) %i) { ; CHECK-LABEL: @neg_icmp_eq_constant_range_attr_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[I:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[I:%.*]], splat (i8 10) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %cmp = icmp eq <2 x i8> %i, @@ -1238,7 +1238,7 @@ define <2 x i1> @icmp_eq_constant_range_return_vec() { define <2 x i1> @neg_icmp_eq_constant_range_return_vec() { ; CHECK-LABEL: @neg_icmp_eq_constant_range_return_vec( ; CHECK-NEXT: [[I:%.*]] = call <2 x i8> @returns_contain_ten_range_helper_vec() -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[I]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[I]], splat (i8 10) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %i = call <2 x i8> @returns_contain_ten_range_helper_vec() @@ -1261,7 +1261,7 @@ define <2 x i1> @icmp_eq_constant_range_call_vec() { define <2 x i1> @neg_icmp_eq_constant_range_call_vec() { ; CHECK-LABEL: @neg_icmp_eq_constant_range_call_vec( ; CHECK-NEXT: [[I:%.*]] = call range(i8 0, 11) <2 x i8> @returns_i8_helper_vec() -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[I]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[I]], splat (i8 10) ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %i = call range(i8 0, 11) <2 x i8> @returns_i8_helper_vec() diff --git a/llvm/test/Transforms/InstSimplify/icmp-not-bool-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-not-bool-constant.ll index 045d773bf32841..e8b047d51df301 100644 --- a/llvm/test/Transforms/InstSimplify/icmp-not-bool-constant.ll +++ b/llvm/test/Transforms/InstSimplify/icmp-not-bool-constant.ll @@ -7,7 +7,7 @@ define <2 x i1> @eq_t_not(<2 x i1> %a) { ; CHECK-LABEL: @eq_t_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -71,7 +71,7 @@ define <2 x i1> @ne_t_not_poison(<2 x i1> %a) { define <2 x i1> @ne_f_not(<2 x i1> %a) { ; CHECK-LABEL: @ne_f_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -90,7 +90,7 @@ define <2 x i1> @ugt_t_not(<2 x i1> %a) { define <2 x i1> @ugt_f_not(<2 x i1> %a) { ; CHECK-LABEL: @ugt_f_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -181,7 +181,7 @@ define <2 x i1> @slt_t_not(<2 x i1> %a) { define <2 x i1> @slt_f_not(<2 x i1> %a) { ; CHECK-LABEL: @slt_f_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -191,7 +191,7 @@ define <2 x i1> @slt_f_not(<2 x i1> %a) { define <2 x i1> @uge_t_not(<2 x i1> %a) { ; CHECK-LABEL: @uge_t_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -201,7 +201,7 @@ define <2 x i1> @uge_t_not(<2 x i1> %a) { define <2 x i1> @uge_f_not(<2 x i1> %a) { ; CHECK-LABEL: @uge_f_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp uge <2 x i1> %not, @@ -210,7 +210,7 @@ define <2 x i1> @uge_f_not(<2 x i1> %a) { define <2 x i1> @ule_t_not(<2 x i1> %a) { ; CHECK-LABEL: @ule_t_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp ule <2 x i1> %not, @@ -246,7 +246,7 @@ define <2 x i1> @ule_f_not_poison(<2 x i1> %a) { define <2 x i1> @sge_t_not(<2 x i1> %a) { ; CHECK-LABEL: @sge_t_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp sge <2 x i1> %not, @@ -282,7 +282,7 @@ define <2 x i1> @sge_f_not_poison(<2 x i1> %a) { define <2 x i1> @sle_t_not(<2 x i1> %a) { ; CHECK-LABEL: @sle_t_not( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], splat (i1 true) ; CHECK-NEXT: ret <2 x i1> [[NOT]] ; %not = xor <2 x i1> %a, @@ -292,7 +292,7 @@ define <2 x i1> @sle_t_not(<2 x i1> %a) { define <2 x i1> @sle_f_not(<2 x i1> %a) { ; CHECK-LABEL: @sle_f_not( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %not = xor <2 x i1> %a, %r = icmp sle <2 x i1> %not, diff --git a/llvm/test/Transforms/InstSimplify/icmp.ll b/llvm/test/Transforms/InstSimplify/icmp.ll index c94922197096f2..2f2e0d32669d79 100644 --- a/llvm/test/Transforms/InstSimplify/icmp.ll +++ b/llvm/test/Transforms/InstSimplify/icmp.ll @@ -232,7 +232,7 @@ define i1 @sub_swap(i8 %x) { define <2 x i1> @sub_odd(<2 x i8> %x) { ; CHECK-LABEL: @sub_odd( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %sub = sub <2 x i8> , %x %cmp = icmp ne <2 x i8> %sub, %x @@ -241,7 +241,7 @@ define <2 x i1> @sub_odd(<2 x i8> %x) { define <2 x i1> @sub_odd_poison(<2 x i8> %x) { ; CHECK-LABEL: @sub_odd_poison( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %sub = sub <2 x i8> , %x %cmp = icmp ne <2 x i8> %sub, %x diff --git a/llvm/test/Transforms/InstSimplify/implies.ll b/llvm/test/Transforms/InstSimplify/implies.ll index 7e3cb656bce158..53d8d79add3016 100644 --- a/llvm/test/Transforms/InstSimplify/implies.ll +++ b/llvm/test/Transforms/InstSimplify/implies.ll @@ -99,7 +99,7 @@ define i1 @test4(i32 %length.i, i32 %i) { ; A ==> A for vectors define <4 x i1> @test5(<4 x i1> %vec) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: ret <4 x i1> +; CHECK-NEXT: ret <4 x i1> splat (i1 true) ; %res = icmp ule <4 x i1> %vec, %vec ret <4 x i1> %res diff --git a/llvm/test/Transforms/InstSimplify/insertelement.ll b/llvm/test/Transforms/InstSimplify/insertelement.ll index 3fe8b8331a40c0..40c075bf1ea667 100644 --- a/llvm/test/Transforms/InstSimplify/insertelement.ll +++ b/llvm/test/Transforms/InstSimplify/insertelement.ll @@ -122,7 +122,7 @@ unreachable_infloop: define <4 x i32> @insert_into_splat(i32 %index) { ; CHECK-LABEL: @insert_into_splat( -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 3) ; %I = insertelement <4 x i32> , i32 3, i32 %index ret <4 x i32> %I diff --git a/llvm/test/Transforms/InstSimplify/known-never-infinity.ll b/llvm/test/Transforms/InstSimplify/known-never-infinity.ll index 4d662c08b1a7a1..af83f00368597f 100644 --- a/llvm/test/Transforms/InstSimplify/known-never-infinity.ll +++ b/llvm/test/Transforms/InstSimplify/known-never-infinity.ll @@ -1123,7 +1123,7 @@ define i1 @isKnownNeverInfinity_vector_reduce_maximum(<4 x double> %x) { define i1 @isKnownNeverInfinity_vector_reduce_maximum_fail(<4 x double> %x) { ; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_maximum_fail ; CHECK-SAME: (<4 x double> [[X:%.*]]) { -; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], splat (double 1.000000e+00) ; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> [[NINF_X]]) ; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 ; CHECK-NEXT: ret i1 [[CMP]] @@ -1148,7 +1148,7 @@ define i1 @isKnownNeverInfinity_vector_reduce_minimum(<4 x double> %x) { define i1 @isKnownNeverInfinity_vector_reduce_minimum_fail(<4 x double> %x) { ; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_minimum_fail ; CHECK-SAME: (<4 x double> [[X:%.*]]) { -; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], splat (double 1.000000e+00) ; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[NINF_X]]) ; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 ; CHECK-NEXT: ret i1 [[CMP]] @@ -1173,7 +1173,7 @@ define i1 @isKnownNeverInfinity_vector_reduce_fmax(<4 x double> %x) { define i1 @isKnownNeverInfinity_vector_reduce_fmax_fail(<4 x double> %x) { ; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmax_fail ; CHECK-SAME: (<4 x double> [[X:%.*]]) { -; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], splat (double 1.000000e+00) ; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[NINF_X]]) ; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 ; CHECK-NEXT: ret i1 [[CMP]] @@ -1198,7 +1198,7 @@ define i1 @isKnownNeverInfinity_vector_reduce_fmin(<4 x double> %x) { define i1 @isKnownNeverInfinity_vector_reduce_fmin_fail(<4 x double> %x) { ; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmin_fail ; CHECK-SAME: (<4 x double> [[X:%.*]]) { -; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], splat (double 1.000000e+00) ; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[NINF_X]]) ; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 ; CHECK-NEXT: ret i1 [[CMP]] diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll index 965c333d306d14..1445402d1feb20 100644 --- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll +++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll @@ -192,7 +192,7 @@ define <4 x i1> @shuf_nonzero_both(<4 x i8> %xx, <4 x i8> %yy) { define <4 x i1> @shuf_nonzero_both_fail(<4 x i8> %xx, <4 x i8> %yy) { ; CHECK-LABEL: @shuf_nonzero_both_fail( -; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], splat (i8 1) ; CHECK-NEXT: [[Y:%.*]] = add nuw <4 x i8> [[YY:%.*]], ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer @@ -208,8 +208,8 @@ define <4 x i1> @shuf_nonzero_both_fail(<4 x i8> %xx, <4 x i8> %yy) { define <4 x i1> @shuf_nonzero_both_fail2(<4 x i8> %xx, <4 x i8> %yy) { ; CHECK-LABEL: @shuf_nonzero_both_fail2( -; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], -; CHECK-NEXT: [[Y:%.*]] = add <4 x i8> [[YY:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], splat (i8 1) +; CHECK-NEXT: [[Y:%.*]] = add <4 x i8> [[YY:%.*]], splat (i8 1) ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[R]] @@ -390,7 +390,7 @@ define i1 @nonzero_reduce_or(<2 x i8> %xx) { define i1 @nonzero_reduce_or_fail(<2 x i8> %xx) { ; CHECK-LABEL: @nonzero_reduce_or_fail( -; CHECK-NEXT: [[X:%.*]] = add nsw <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[X:%.*]] = add nsw <2 x i8> [[XX:%.*]], splat (i8 1) ; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> [[X]]) ; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0 ; CHECK-NEXT: ret i1 [[R]] diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll index 5c659a1e7086a6..ddd4140b3aeddf 100644 --- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -99,7 +99,7 @@ define i3 @smin_poison(i3 %x) { define <2 x i8> @umax_undef(<2 x i8> %x) { ; CHECK-LABEL: @umax_undef( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> %x) ret <2 x i8> %r @@ -107,7 +107,7 @@ define <2 x i8> @umax_undef(<2 x i8> %x) { define <2 x i8> @umax_poison(<2 x i8> %x) { ; CHECK-LABEL: @umax_poison( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> poison, <2 x i8> %x) ret <2 x i8> %r @@ -139,7 +139,7 @@ define i8 @smax_maxval(i8 %x) { define <2 x i8> @smax_maxval_commute(<2 x i8> %x) { ; CHECK-LABEL: @smax_maxval_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 127) ; %r = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %x) ret <2 x i8> %r @@ -155,7 +155,7 @@ define i8 @smin_minval(i8 %x) { define <2 x i8> @smin_minval_commute(<2 x i8> %x) { ; CHECK-LABEL: @smin_minval_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -128) ; %r = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> ) ret <2 x i8> %r @@ -171,7 +171,7 @@ define i8 @umax_maxval(i8 %x) { define <2 x i8> @umax_maxval_commute(<2 x i8> %x) { ; CHECK-LABEL: @umax_maxval_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> %x) ret <2 x i8> %r @@ -259,7 +259,7 @@ define <2 x i8> @umin_maxval_commute(<2 x i8> %x) { define <2 x i8> @smax_maxval_partial_poison(<2 x i8> %x) { ; CHECK-LABEL: @smax_maxval_partial_poison( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 127) ; %r = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %x) ret <2 x i8> %r @@ -267,7 +267,7 @@ define <2 x i8> @smax_maxval_partial_poison(<2 x i8> %x) { define <2 x i8> @smin_minval_partial_poison(<2 x i8> %x) { ; CHECK-LABEL: @smin_minval_partial_poison( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -128) ; %r = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> ) ret <2 x i8> %r @@ -275,7 +275,7 @@ define <2 x i8> @smin_minval_partial_poison(<2 x i8> %x) { define <2 x i8> @umax_maxval_partial_poison(<2 x i8> %x) { ; CHECK-LABEL: @umax_maxval_partial_poison( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> %x) ret <2 x i8> %r @@ -709,7 +709,7 @@ define i8 @umax_umax_constants_commute2(i8 %x) { define <2 x i8> @umax_umax_constants_commute3(<2 x i8> %x) { ; CHECK-LABEL: @umax_umax_constants_commute3( -; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> splat (i8 -2), <2 x i8> [[X:%.*]]) ; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> %x) @@ -739,7 +739,7 @@ define i8 @umin_umin_constants_commute1(i8 %x) { define <2 x i8> @umin_umin_constants_commute2(<2 x i8> %x) { ; CHECK-LABEL: @umin_umin_constants_commute2( -; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 127)) ; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> ) @@ -769,7 +769,7 @@ define i8 @smax_smax_constants(i8 %x) { define <2 x i8> @smax_smax_constants_commute1(<2 x i8> %x) { ; CHECK-LABEL: @smax_smax_constants_commute1( -; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> splat (i8 7), <2 x i8> [[X:%.*]]) ; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %x) @@ -799,7 +799,7 @@ define i8 @smax_smax_constants_commute3(i8 %x) { define <2 x i8> @smin_smin_constants(<2 x i8> %x) { ; CHECK-LABEL: @smin_smin_constants( -; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 7)) ; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> ) @@ -842,7 +842,7 @@ define i8 @smin_smin_constants_commute3(i8 %x) { define <2 x i8> @umin_umin_constants_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @umin_umin_constants_partial_undef( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> , <2 x i8> [[M]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> splat (i8 9), <2 x i8> [[M]]) ; CHECK-NEXT: ret <2 x i8> [[M2]] ; %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> ) @@ -855,7 +855,7 @@ define <2 x i8> @umin_umin_constants_partial_undef(<2 x i8> %x) { define <2 x i8> @smax_smax_constants_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @smax_smax_constants_partial_undef( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> [[M]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> splat (i8 9), <2 x i8> [[M]]) ; CHECK-NEXT: ret <2 x i8> [[M2]] ; %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %x, <2 x i8> ) diff --git a/llvm/test/Transforms/InstSimplify/negate.ll b/llvm/test/Transforms/InstSimplify/negate.ll index d07029becd1fe9..95d8f46ac5d249 100644 --- a/llvm/test/Transforms/InstSimplify/negate.ll +++ b/llvm/test/Transforms/InstSimplify/negate.ll @@ -64,7 +64,7 @@ define i8 @negate_zero_or_minsigned(i8 %x) { define <2 x i8> @negate_zero_or_minsigned_vec(<2 x i8> %x) { ; CHECK-LABEL: @negate_zero_or_minsigned_vec( -; CHECK-NEXT: [[SIGNBIT:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[SIGNBIT:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -128) ; CHECK-NEXT: ret <2 x i8> [[SIGNBIT]] ; %signbit = and <2 x i8> %x, diff --git a/llvm/test/Transforms/InstSimplify/or-icmps-same-ops.ll b/llvm/test/Transforms/InstSimplify/or-icmps-same-ops.ll index fb67ba309876fc..440bc7497e0963 100644 --- a/llvm/test/Transforms/InstSimplify/or-icmps-same-ops.ll +++ b/llvm/test/Transforms/InstSimplify/or-icmps-same-ops.ll @@ -1192,7 +1192,7 @@ define i1 @ult_ult(i8 %a, i8 %b) { define <2 x i1> @ult_uge_vec(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @ult_uge_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %cmp1 = icmp ult <2 x i8> %a, %b %cmp2 = icmp uge <2 x i8> %a, %b diff --git a/llvm/test/Transforms/InstSimplify/or.ll b/llvm/test/Transforms/InstSimplify/or.ll index f241c6987b9e70..ae60e2def0797c 100644 --- a/llvm/test/Transforms/InstSimplify/or.ll +++ b/llvm/test/Transforms/InstSimplify/or.ll @@ -19,7 +19,7 @@ define i32 @all_ones(i32 %A) { define <3 x i8> @all_ones_vec_with_poison_elt(<3 x i8> %A) { ; CHECK-LABEL: @all_ones_vec_with_poison_elt( -; CHECK-NEXT: ret <3 x i8> +; CHECK-NEXT: ret <3 x i8> splat (i8 -1) ; %B = or <3 x i8> %A, ret <3 x i8> %B @@ -70,7 +70,7 @@ define i32 @or_not(i32 %A) { define <2 x i4> @or_not_commute_vec_poison(<2 x i4> %A) { ; CHECK-LABEL: @or_not_commute_vec_poison( -; CHECK-NEXT: ret <2 x i4> +; CHECK-NEXT: ret <2 x i4> splat (i4 -1) ; %NotA = xor <2 x i4> %A, %B = or <2 x i4> %NotA, %A @@ -130,7 +130,7 @@ define i8 @test11(i8 %A) { define i8 @test11v(<2 x i8> %A) { ; CHECK-LABEL: @test11v( ; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[CV:%.*]] = xor <2 x i8> [[B]], +; CHECK-NEXT: [[CV:%.*]] = xor <2 x i8> [[B]], splat (i8 13) ; CHECK-NEXT: [[C:%.*]] = extractelement <2 x i8> [[CV]], i32 0 ; CHECK-NEXT: [[D:%.*]] = or i8 [[C]], 1 ; CHECK-NEXT: [[E:%.*]] = xor i8 [[D]], 12 @@ -221,7 +221,7 @@ define i117 @test6_apint(i117 %X) { ; replace with V+N. define <2 x i39> @test7_apint(<2 x i39> %V, <2 x i39> %M) { ; CHECK-LABEL: @test7_apint( -; CHECK-NEXT: [[N:%.*]] = and <2 x i39> [[M:%.*]], +; CHECK-NEXT: [[N:%.*]] = and <2 x i39> [[M:%.*]], splat (i39 -274877906944) ; CHECK-NEXT: [[A:%.*]] = add <2 x i39> [[N]], [[V:%.*]] ; CHECK-NEXT: ret <2 x i39> [[A]] ; @@ -241,7 +241,7 @@ define <2 x i39> @test7_apint(<2 x i39> %V, <2 x i39> %M) { ; replace with V+N. define <2 x i399> @test8_apint(<2 x i399> %V, <2 x i399> %M) { ; CHECK-LABEL: @test8_apint( -; CHECK-NEXT: [[N:%.*]] = and <2 x i399> [[M:%.*]], +; CHECK-NEXT: [[N:%.*]] = and <2 x i399> [[M:%.*]], splat (i399 18446742974197923840) ; CHECK-NEXT: [[A:%.*]] = add <2 x i399> [[N]], [[V:%.*]] ; CHECK-NEXT: ret <2 x i399> [[A]] ; @@ -320,7 +320,7 @@ define i8 @or_with_not_op_commute2(i8 %a, i8 %b) { define <3 x i17> @or_with_not_op_commute3(<3 x i17> %a, <3 x i17> %b) { ; CHECK-LABEL: @or_with_not_op_commute3( -; CHECK-NEXT: ret <3 x i17> +; CHECK-NEXT: ret <3 x i17> splat (i17 -1) ; %ab = and <3 x i17> %a, %b %not = xor <3 x i17> %ab, @@ -332,7 +332,7 @@ define <3 x i17> @or_with_not_op_commute3(<3 x i17> %a, <3 x i17> %b) { define <2 x i1> @or_with_not_op_commute4(<2 x i1> %a, <2 x i1> %b) { ; CHECK-LABEL: @or_with_not_op_commute4( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %ab = and <2 x i1> %b, %a %not = xor <2 x i1> %ab, @@ -391,7 +391,7 @@ define i8 @and_or_not_or_commute2(i8 %A, i8 %B) { define <2 x i4> @and_or_not_or_commute3(<2 x i4> %A, <2 x i4> %B) { ; CHECK-LABEL: @and_or_not_or_commute3( -; CHECK-NEXT: [[NOTA:%.*]] = xor <2 x i4> [[A:%.*]], +; CHECK-NEXT: [[NOTA:%.*]] = xor <2 x i4> [[A:%.*]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOTA]] ; %nota = xor <2 x i4> %A, @@ -443,7 +443,7 @@ define i8 @and_or_not_or_commute6(i8 %A, i8 %B) { define <2 x i4> @and_or_not_or_commute7(<2 x i4> %A, <2 x i4> %B) { ; CHECK-LABEL: @and_or_not_or_commute7( -; CHECK-NEXT: [[NOTA:%.*]] = xor <2 x i4> [[A:%.*]], +; CHECK-NEXT: [[NOTA:%.*]] = xor <2 x i4> [[A:%.*]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOTA]] ; %nota = xor <2 x i4> %A, @@ -503,7 +503,7 @@ define <2 x i4> @and_or_not_or_commute7_undef_elt(<2 x i4> %A, <2 x i4> %B) { ; CHECK-NEXT: [[NOTA:%.*]] = xor <2 x i4> [[A:%.*]], ; CHECK-NEXT: [[AND:%.*]] = and <2 x i4> [[B:%.*]], [[NOTA]] ; CHECK-NEXT: [[OR:%.*]] = or <2 x i4> [[B]], [[A]] -; CHECK-NEXT: [[NOTAB:%.*]] = xor <2 x i4> [[OR]], +; CHECK-NEXT: [[NOTAB:%.*]] = xor <2 x i4> [[OR]], splat (i4 -1) ; CHECK-NEXT: [[R:%.*]] = or <2 x i4> [[NOTAB]], [[AND]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; @@ -608,7 +608,7 @@ define i32 @shifted_all_ones_commute(i32 %shamt) { define <2 x i9> @shifted_all_ones_sub_on_lshr(<2 x i9> %shamt) { ; CHECK-LABEL: @shifted_all_ones_sub_on_lshr( -; CHECK-NEXT: ret <2 x i9> +; CHECK-NEXT: ret <2 x i9> splat (i9 -1) ; %l = shl <2 x i9> , %shamt %s = sub <2 x i9> , %shamt @@ -697,7 +697,7 @@ define i4 @or_nxor_and_commute0(i4 %a, i4 %b) { define <2 x i4> @or_nxor_and_commute1(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @or_nxor_and_commute1( ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i4> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[XOR]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[XOR]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOT]] ; %and = and <2 x i4> %a, %b @@ -723,7 +723,7 @@ define i74 @or_nxor_and_commute2(i74 %a, i74 %b) { define <2 x i4> @or_nxor_and_commute3(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @or_nxor_and_commute3( ; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i4> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[XOR]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[XOR]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOT]] ; %and = and <2 x i4> %b, %a @@ -814,7 +814,7 @@ define i4 @or_nxor_or_commute0(i4 %a, i4 %b) { define <2 x i4> @or_nxor_or_commute1(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @or_nxor_or_commute1( -; CHECK-NEXT: ret <2 x i4> +; CHECK-NEXT: ret <2 x i4> splat (i4 -1) ; %or = or <2 x i4> %a, %b %xor = xor <2 x i4> %a, %b @@ -836,7 +836,7 @@ define i74 @or_nxor_or_commute2(i74 %a, i74 %b) { define <2 x i4> @or_nxor_or_commute3(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @or_nxor_or_commute3( -; CHECK-NEXT: ret <2 x i4> +; CHECK-NEXT: ret <2 x i4> splat (i4 -1) ; %or = or <2 x i4> %b, %a %xor = xor <2 x i4> %a, %b @@ -883,7 +883,7 @@ define i4 @or_nxor_or_wrong_val2(i4 %a, i4 %b, i4 %c) { define <2 x i4> @or_nxor_or_poison_elt(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @or_nxor_or_poison_elt( -; CHECK-NEXT: ret <2 x i4> +; CHECK-NEXT: ret <2 x i4> splat (i4 -1) ; %or = or <2 x i4> %b, %a %xor = xor <2 x i4> %a, %b @@ -998,7 +998,7 @@ define i32 @or_xor_not_op_or_commute7(i32 %a, i32 %b){ define <2 x i4> @or_xor_not_op_or_poison_elt(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @or_xor_not_op_or_poison_elt( -; CHECK-NEXT: ret <2 x i4> +; CHECK-NEXT: ret <2 x i4> splat (i4 -1) ; %xor = xor <2 x i4> %a, %b %nota = xor <2 x i4> %a, @@ -1042,7 +1042,7 @@ define i4 @or_nand_xor(i4 %x, i4 %y) { define <2 x i4> @or_nand_xor_commute1(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @or_nand_xor_commute1( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i4> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i4> [[AND]], +; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i4> [[AND]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NAND]] ; %and = and <2 x i4> %y, %x diff --git a/llvm/test/Transforms/InstSimplify/pr28725.ll b/llvm/test/Transforms/InstSimplify/pr28725.ll index 81884c9270ac2a..9de5bed6aa8cee 100644 --- a/llvm/test/Transforms/InstSimplify/pr28725.ll +++ b/llvm/test/Transforms/InstSimplify/pr28725.ll @@ -6,7 +6,7 @@ define <2 x i16> @test1() { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 extractelement (<2 x i16> bitcast (<1 x i32> to <2 x i16>), i32 0), 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 extractelement (<2 x i16> bitcast (<1 x i32> splat (i32 1) to <2 x i16>), i32 0), 0 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], [[S:%.*]] zeroinitializer, [[S]] { i16 0, i32 1 } ; CHECK-NEXT: [[E:%.*]] = extractvalue [[S]] [[SEL]], 0 ; CHECK-NEXT: [[B:%.*]] = insertelement <2 x i16> , i16 [[E]], i32 0 diff --git a/llvm/test/Transforms/InstSimplify/ptrmask.ll b/llvm/test/Transforms/InstSimplify/ptrmask.ll index d2c4a5dd7f0353..5e7c636d623180 100644 --- a/llvm/test/Transforms/InstSimplify/ptrmask.ll +++ b/llvm/test/Transforms/InstSimplify/ptrmask.ll @@ -170,7 +170,7 @@ define ptr @ptrmask_simplify_aligned_unused(ptr align 64 %p) { define <2 x ptr> @ptrmask_simplify_aligned_unused_vec(<2 x ptr> align 128 %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_aligned_unused_vec ; CHECK-SAME: (<2 x ptr> align 128 [[P:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -64)) ; CHECK-NEXT: ret <2 x ptr> [[R]] ; %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) @@ -212,8 +212,8 @@ define ptr @ptrmask_simplify_known_unused(ptr %p) { define <2 x ptr> @ptrmask_simplify_known_unused_vec(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { -; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) -; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -64)) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32) ; CHECK-NEXT: ret <2 x ptr> [[PGEP]] ; %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) @@ -225,8 +225,8 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec(<2 x ptr> %p) { define <2 x ptr> @ptrmask_simplify_known_unused_vec2(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec2 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { -; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) -; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -64)) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32) ; CHECK-NEXT: ret <2 x ptr> [[PGEP]] ; %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) @@ -239,7 +239,7 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec3(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec3 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { ; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) -; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32) ; CHECK-NEXT: ret <2 x ptr> [[PGEP]] ; %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) @@ -266,7 +266,7 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail(<2 x ptr> %p) { ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { ; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) ; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> -; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> ) +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> splat (i64 -32)) ; CHECK-NEXT: ret <2 x ptr> [[R]] ; %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) @@ -278,8 +278,8 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail(<2 x ptr> %p) { define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail2(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail2 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { -; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) -; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> +; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -64)) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32) ; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> ) ; CHECK-NEXT: ret <2 x ptr> [[R]] ; @@ -293,8 +293,8 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail3(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail3 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { ; CHECK-NEXT: [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> ) -; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> -; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> ) +; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32) +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> splat (i64 -32)) ; CHECK-NEXT: ret <2 x ptr> [[R]] ; %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> ) diff --git a/llvm/test/Transforms/InstSimplify/rem.ll b/llvm/test/Transforms/InstSimplify/rem.ll index 5ec803c6d0481e..4fb1d9167ab7b3 100644 --- a/llvm/test/Transforms/InstSimplify/rem.ll +++ b/llvm/test/Transforms/InstSimplify/rem.ll @@ -505,7 +505,7 @@ define <2 x i8> @simplfy_srem_of_mul(<2 x i8> %x) { define <2 x i8> @simplfy_srem_of_mul_fail_bad_mod(<2 x i8> %x) { ; CHECK-LABEL: @simplfy_srem_of_mul_fail_bad_mod( ; CHECK-NEXT: [[MUL:%.*]] = mul nsw <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = srem <2 x i8> [[MUL]], +; CHECK-NEXT: [[R:%.*]] = srem <2 x i8> [[MUL]], splat (i8 5) ; CHECK-NEXT: ret <2 x i8> [[R]] ; %mul = mul nsw <2 x i8> %x, diff --git a/llvm/test/Transforms/InstSimplify/returned.ll b/llvm/test/Transforms/InstSimplify/returned.ll index 2da1052e7d4d89..cff12c5ea406c9 100644 --- a/llvm/test/Transforms/InstSimplify/returned.ll +++ b/llvm/test/Transforms/InstSimplify/returned.ll @@ -38,7 +38,7 @@ define <8 x i1> @returned_vec_arg_casted(<2 x i32> %a) { define <8 x i1> @returned_vec_arg_casted2(<2 x i32> %a) { ; CHECK-LABEL: @returned_vec_arg_casted2( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 1) ; CHECK-NEXT: [[X:%.*]] = call <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> [[OR]]) ; CHECK-NEXT: [[C:%.*]] = icmp ne <8 x i8> [[X]], zeroinitializer ; CHECK-NEXT: ret <8 x i1> [[C]] diff --git a/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll b/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll index ea139f2411cc3d..25a35062e60b8d 100644 --- a/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll @@ -58,7 +58,7 @@ define i8 @uadd_scalar_maxval(i8 %a) { define <2 x i9> @uadd_vector_maxval(<2 x i9> %a) { ; CHECK-LABEL: @uadd_vector_maxval( -; CHECK-NEXT: ret <2 x i9> +; CHECK-NEXT: ret <2 x i9> splat (i9 -1) ; %x3v = call <2 x i9> @llvm.uadd.sat.v2i9(<2 x i9> %a, <2 x i9> ) ret <2 x i9> %x3v @@ -74,7 +74,7 @@ define i3 @uadd_scalar_maxval_commute(i3 %a) { define <2 x i8> @uadd_vector_maxval_commute(<2 x i8> %a) { ; CHECK-LABEL: @uadd_vector_maxval_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x4v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> , <2 x i8> %a) ret <2 x i8> %x4v @@ -98,7 +98,7 @@ define i8 @uadd_scalar_poison(i8 %a) { define <2 x i8> @uadd_vector_undef(<2 x i8> %a) { ; CHECK-LABEL: @uadd_vector_undef( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x5v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) ret <2 x i8> %x5v @@ -106,7 +106,7 @@ define <2 x i8> @uadd_vector_undef(<2 x i8> %a) { define <2 x i8> @uadd_vector_poison(<2 x i8> %a) { ; CHECK-LABEL: @uadd_vector_poison( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x5v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) ret <2 x i8> %x5v @@ -130,7 +130,7 @@ define i8 @uadd_scalar_poison_commute(i8 %a) { define <2 x i8> @uadd_vector_undef_commute(<2 x i8> %a) { ; CHECK-LABEL: @uadd_vector_undef_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x5v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> %a) ret <2 x i8> %x5v @@ -138,7 +138,7 @@ define <2 x i8> @uadd_vector_undef_commute(<2 x i8> %a) { define <2 x i8> @uadd_vector_poison_commute(<2 x i8> %a) { ; CHECK-LABEL: @uadd_vector_poison_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %x5v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> %a) ret <2 x i8> %x5v @@ -187,7 +187,7 @@ define i8 @sadd_scalar_maxval(i8 %a) { define <2 x i8> @sadd_vector_maxval(<2 x i8> %a) { ; CHECK-LABEL: @sadd_vector_maxval( -; CHECK-NEXT: [[Y3V:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[Y3V:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 127)) ; CHECK-NEXT: ret <2 x i8> [[Y3V]] ; %y3v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -230,7 +230,7 @@ define i8 @sadd_scalar_poison(i8 %a) { define <2 x i8> @sadd_vector_undef(<2 x i8> %a) { ; CHECK-LABEL: @sadd_vector_undef( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %y5v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> undef) ret <2 x i8> %y5v @@ -238,7 +238,7 @@ define <2 x i8> @sadd_vector_undef(<2 x i8> %a) { define <2 x i8> @sadd_vector_poison(<2 x i8> %a) { ; CHECK-LABEL: @sadd_vector_poison( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %y5v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> poison) ret <2 x i8> %y5v @@ -262,7 +262,7 @@ define i8 @sadd_scalar_poison_commute(i8 %a) { define <2 x i8> @sadd_vector_undef_commute(<2 x i8> %a) { ; CHECK-LABEL: @sadd_vector_undef_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %y6v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> %a) ret <2 x i8> %y6v @@ -270,7 +270,7 @@ define <2 x i8> @sadd_vector_undef_commute(<2 x i8> %a) { define <2 x i8> @sadd_vector_poison_commute(<2 x i8> %a) { ; CHECK-LABEL: @sadd_vector_poison_commute( -; CHECK-NEXT: ret <2 x i8> +; CHECK-NEXT: ret <2 x i8> splat (i8 -1) ; %y6v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> %a) ret <2 x i8> %y6v @@ -449,7 +449,7 @@ define i8 @ssub_scalar_maxval(i8 %a) { define <2 x i8> @ssub_vector_maxval(<2 x i8> %a) { ; CHECK-LABEL: @ssub_vector_maxval( -; CHECK-NEXT: [[Y3V:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[Y3V:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 127)) ; CHECK-NEXT: ret <2 x i8> [[Y3V]] ; %y3v = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> ) @@ -833,7 +833,7 @@ define i1 @uadd_ult(i8 %x, i8 %y) { define <2 x i1> @uadd_uge_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @uadd_uge_vec( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %sat = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %x, <2 x i8> %y) %cmp = icmp uge <2 x i8> %sat, %x diff --git a/llvm/test/Transforms/InstSimplify/sdiv.ll b/llvm/test/Transforms/InstSimplify/sdiv.ll index 99092802cab025..ec93e4d2fb6cbb 100644 --- a/llvm/test/Transforms/InstSimplify/sdiv.ll +++ b/llvm/test/Transforms/InstSimplify/sdiv.ll @@ -12,7 +12,7 @@ define i32 @negated_operand(i32 %x) { define <2 x i32> @negated_operand_commute_vec(<2 x i32> %x) { ; CHECK-LABEL: @negated_operand_commute_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 -1) ; %negx = sub nsw <2 x i32> zeroinitializer, %x %div = sdiv <2 x i32> %negx, %x @@ -31,7 +31,7 @@ define i32 @knownnegation(i32 %x, i32 %y) { define <2 x i32> @knownnegation_commute_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @knownnegation_commute_vec( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 -1) ; %xy = sub nsw <2 x i32> %x, %y %yx = sub nsw <2 x i32> %y, %x @@ -160,7 +160,7 @@ define <2 x i32> @knownnegation_commute_vec_bad3(<2 x i32> %x, <2 x i32> %y) { define <3 x i32> @negated_operand_vec_poison(<3 x i32> %x) { ; CHECK-LABEL: @negated_operand_vec_poison( -; CHECK-NEXT: ret <3 x i32> +; CHECK-NEXT: ret <3 x i32> splat (i32 -1) ; %negx = sub nsw <3 x i32> , %x %div = sdiv <3 x i32> %negx, %x diff --git a/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll index 33670128af439c..6d48afc4013515 100644 --- a/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll @@ -59,7 +59,7 @@ define <2 x i32> @equal_arms_vec(<2 x i1> %cond, <2 x i32> %x) { define <2 x i32> @equal_arms_vec_poison(<2 x i1> %cond) { ; CHECK-LABEL: @equal_arms_vec_poison( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 42) ; %V = select <2 x i1> %cond, <2 x i32> , <2 x i32> ret <2 x i32> %V @@ -265,7 +265,7 @@ define i32 @test11(i32 %X) { define <2 x i8> @test11vec(<2 x i8> %X) { ; CHECK-LABEL: @test11vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 127) ; CHECK-NEXT: ret <2 x i8> [[AND]] ; %cmp = icmp sgt <2 x i8> %X, @@ -658,8 +658,8 @@ define i1 @and_cmps(i32 %x) { define <2 x i1> @and_cmps_vector(<2 x i32> %x) { ; CHECK-LABEL: @and_cmps_vector( -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[X]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 92) +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[X]], splat (i32 11) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP1]], <2 x i1> [[CMP2]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstSimplify/select-logical.ll b/llvm/test/Transforms/InstSimplify/select-logical.ll index 1c11bb8982eb11..6b5cef7b8e26ce 100644 --- a/llvm/test/Transforms/InstSimplify/select-logical.ll +++ b/llvm/test/Transforms/InstSimplify/select-logical.ll @@ -238,7 +238,7 @@ define <3 x i1> @logical_or_not_and_vector1(<3 x i1> %x, <3 x i1> %y) { define <3 x i1> @logical_or_not_and_vector1_poison1(<3 x i1> %x, <3 x i1> %y) { ; CHECK-LABEL: @logical_or_not_and_vector1_poison1( ; CHECK-NEXT: [[L_AND:%.*]] = select <3 x i1> [[X:%.*]], <3 x i1> , <3 x i1> [[Y:%.*]] -; CHECK-NEXT: [[NOT:%.*]] = xor <3 x i1> [[L_AND]], +; CHECK-NEXT: [[NOT:%.*]] = xor <3 x i1> [[L_AND]], splat (i1 true) ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[NOT]], <3 x i1> [[X]], <3 x i1> zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; @@ -299,7 +299,7 @@ define i1 @logical_nand_logical_or_common_op_commute1(i1 %x, i1 %y) { define <2 x i1> @logical_nand_logical_or_common_op_commute2(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @logical_nand_logical_or_common_op_commute2( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %and = select <2 x i1> %y, <2 x i1> %x, <2 x i1> zeroinitializer %nand = xor <2 x i1> %and, @@ -309,7 +309,7 @@ define <2 x i1> @logical_nand_logical_or_common_op_commute2(<2 x i1> %x, <2 x i1 define <2 x i1> @logical_nand_logical_or_common_op_commute3(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @logical_nand_logical_or_common_op_commute3( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %and = select <2 x i1> %x, <2 x i1> %y, <2 x i1> zeroinitializer %nand = xor <2 x i1> %and, @@ -332,8 +332,8 @@ define i1 @logical_nand_logical_or_common_op_commute4(i1 %x, i1 %y) { define <2 x i1> @logical_nand_logical_or_common_op_commute4_poison_vec(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @logical_nand_logical_or_common_op_commute4_poison_vec( ; CHECK-NEXT: [[AND:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i1> [[X:%.*]], <2 x i1> -; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i1> [[AND]], -; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[X]], <2 x i1> , <2 x i1> [[NAND]] +; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i1> [[AND]], splat (i1 true) +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[X]], <2 x i1> splat (i1 true), <2 x i1> [[NAND]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %and = select <2 x i1> %y, <2 x i1> %x, <2 x i1> @@ -666,7 +666,7 @@ define <2 x i1> @or_same_op(<2 x i1> %x) { define <2 x i1> @always_true_same_op(<2 x i1> %x) { ; CHECK-LABEL: @always_true_same_op( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %r = select <2 x i1> %x, <2 x i1> %x, <2 x i1> ret <2 x i1> %r @@ -714,7 +714,7 @@ define <2 x i1> @or_and_common_op_commute2(<2 x i1> %x, <2 x i1> %y) { define <2 x i1> @or_and_common_op_commute2_poison(<2 x i1> %x, <2 x i1> %y) { ; CHECK-LABEL: @or_and_common_op_commute2_poison( ; CHECK-NEXT: [[A:%.*]] = select <2 x i1> [[X:%.*]], <2 x i1> [[Y:%.*]], <2 x i1> -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[Y]], <2 x i1> , <2 x i1> [[A]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[Y]], <2 x i1> splat (i1 true), <2 x i1> [[A]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %a = select <2 x i1> %x, <2 x i1> %y, <2 x i1> diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll index 4618e1143dc8f9..5f160aa1658616 100644 --- a/llvm/test/Transforms/InstSimplify/select.ll +++ b/llvm/test/Transforms/InstSimplify/select.ll @@ -67,7 +67,7 @@ define <2 x i32> @equal_arms_vec(<2 x i1> %cond, <2 x i32> %x) { define <2 x i32> @equal_arms_vec_poison(<2 x i1> %cond) { ; CHECK-LABEL: @equal_arms_vec_poison( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> splat (i32 42) ; %V = select <2 x i1> %cond, <2 x i32> , <2 x i32> ret <2 x i32> %V @@ -319,7 +319,7 @@ define i32 @test11(i32 %X) { define <2 x i8> @test11vec(<2 x i8> %X) { ; CHECK-LABEL: @test11vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 127) ; CHECK-NEXT: ret <2 x i8> [[AND]] ; %cmp = icmp sgt <2 x i8> %X, @@ -712,8 +712,8 @@ define i1 @and_cmps(i32 %x) { define <2 x i1> @and_cmps_vector(<2 x i32> %x) { ; CHECK-LABEL: @and_cmps_vector( -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[X]], +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 92) +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt <2 x i32> [[X]], splat (i32 11) ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP1]], <2 x i1> [[CMP2]], <2 x i1> zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; diff --git a/llvm/test/Transforms/InstSimplify/select_or_and.ll b/llvm/test/Transforms/InstSimplify/select_or_and.ll index 6bb7fbf5dc41d4..41b3353df57268 100644 --- a/llvm/test/Transforms/InstSimplify/select_or_and.ll +++ b/llvm/test/Transforms/InstSimplify/select_or_and.ll @@ -204,7 +204,7 @@ define i32 @select_icmp_and_eq_commuted(i32 %a, i32 %b) { ; https://alive2.llvm.org/ce/z/HfYXvx define <2 x i16> @select_icmp_and_eq_vec(<2 x i16> %a, <2 x i16> %b) { ; CHECK-LABEL: @select_icmp_and_eq_vec( -; CHECK-NEXT: ret <2 x i16> +; CHECK-NEXT: ret <2 x i16> splat (i16 -1) ; %and = and <2 x i16> %a, %b %tobool = icmp eq <2 x i16> %and, diff --git a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll index 6bf03779379ec7..3917172e3b752b 100644 --- a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll +++ b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll @@ -264,7 +264,7 @@ define <2 x i15> @shl_vector_zero(<2 x i15> %a, <2 x i15> %b) { define <2 x i32> @shl_vector_for_real(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @shl_vector_for_real( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[B:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 3) ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[AND]] ; CHECK-NEXT: ret <2 x i32> [[SHL]] ; @@ -318,7 +318,7 @@ define i32 @lshr_cttz_zero_is_undef(i32 %x) { define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec( ; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true) -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[SH]] ; %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true) @@ -342,7 +342,7 @@ define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) { define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec( ; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true) -; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], splat (i8 3) ; CHECK-NEXT: ret <2 x i8> [[SH]] ; %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true) diff --git a/llvm/test/Transforms/InstSimplify/shift.ll b/llvm/test/Transforms/InstSimplify/shift.ll index a816fcbdeeee00..a6de24e24f63c4 100644 --- a/llvm/test/Transforms/InstSimplify/shift.ll +++ b/llvm/test/Transforms/InstSimplify/shift.ll @@ -115,7 +115,7 @@ define i32 @ashr_all_ones(i32 %A) { define <3 x i8> @ashr_all_ones_vec_with_poison_elts(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @ashr_all_ones_vec_with_poison_elts( -; CHECK-NEXT: ret <3 x i8> +; CHECK-NEXT: ret <3 x i8> splat (i8 -1) ; %sh = ashr <3 x i8> , %y ret <3 x i8> %sh @@ -225,9 +225,9 @@ define <2 x i64> @shl_or_shr2v(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @shl_or_shr2v( ; CHECK-NEXT: [[T1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64> -; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i64> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = shl nuw <2 x i64> [[T1]], splat (i64 31) ; CHECK-NEXT: [[T4:%.*]] = or <2 x i64> [[T2]], [[T3]] -; CHECK-NEXT: [[T5:%.*]] = lshr <2 x i64> [[T4]], +; CHECK-NEXT: [[T5:%.*]] = lshr <2 x i64> [[T4]], splat (i64 31) ; CHECK-NEXT: ret <2 x i64> [[T5]] ; %t1 = zext <2 x i32> %a to <2 x i64> @@ -297,7 +297,7 @@ define i32 @all_ones_left_right(i32 %x) { define <2 x i7> @all_ones_left_right_splat(<2 x i7> %x) { ; CHECK-LABEL: @all_ones_left_right_splat( -; CHECK-NEXT: ret <2 x i7> +; CHECK-NEXT: ret <2 x i7> splat (i7 -1) ; %left = shl <2 x i7> , %x %right = ashr <2 x i7> %left, %x @@ -319,7 +319,7 @@ define <3 x i7> @all_ones_left_right_splat_undef_elt(<3 x i7> %x) { define <3 x i7> @all_ones_left_right_splat_poison__elt(<3 x i7> %x) { ; CHECK-LABEL: @all_ones_left_right_splat_poison__elt( -; CHECK-NEXT: ret <3 x i7> +; CHECK-NEXT: ret <3 x i7> splat (i7 -1) ; %left = shl <3 x i7> , %x %right = ashr <3 x i7> %left, %x diff --git a/llvm/test/Transforms/InstSimplify/shr-nop.ll b/llvm/test/Transforms/InstSimplify/shr-nop.ll index 90d49ccb312bcb..29fe222faaae3e 100644 --- a/llvm/test/Transforms/InstSimplify/shr-nop.ll +++ b/llvm/test/Transforms/InstSimplify/shr-nop.ll @@ -415,7 +415,7 @@ define <2 x i4097> @ashr_zero_vec(<2 x i4097> %shiftval) { define <2 x i64> @ashr_minus1_vec(<2 x i64> %shiftval) { ; CHECK-LABEL: @ashr_minus1_vec( -; CHECK-NEXT: ret <2 x i64> +; CHECK-NEXT: ret <2 x i64> splat (i64 -1) ; %shr = ashr <2 x i64> , %shiftval ret <2 x i64> %shr diff --git a/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll index 49cacdc1e8d0f6..e7b54947587cd0 100644 --- a/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll @@ -11,7 +11,7 @@ define <4 x i32> @const_folding(<4 x i32> %x) { define <4 x i32> @const_folding1(<4 x i32> %x) { ; CHECK-LABEL: @const_folding1( -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 5) ; %shuf = shufflevector <4 x i32> , <4 x i32> %x, <4 x i32> zeroinitializer ret <4 x i32> %shuf @@ -260,7 +260,7 @@ define <2 x float> @PR32872(<2 x float> %x) { define <5 x i8> @splat_inserted_constant(<4 x i8> %x) { ; CHECK-LABEL: @splat_inserted_constant( -; CHECK-NEXT: ret <5 x i8> +; CHECK-NEXT: ret <5 x i8> splat (i8 42) ; %ins3 = insertelement <4 x i8> %x, i8 42, i64 3 %splat5 = shufflevector <4 x i8> %ins3, <4 x i8> poison, <5 x i32> diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll index 20195051616050..0442bd721974b5 100644 --- a/llvm/test/Transforms/InstSimplify/shufflevector.ll +++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll @@ -11,7 +11,7 @@ define <4 x i32> @const_folding(<4 x i32> %x) { define <4 x i32> @const_folding1(<4 x i32> %x) { ; CHECK-LABEL: @const_folding1( -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 5) ; %shuf = shufflevector <4 x i32> , <4 x i32> %x, <4 x i32> zeroinitializer ret <4 x i32> %shuf @@ -260,7 +260,7 @@ define <2 x float> @PR32872(<2 x float> %x) { define <5 x i8> @splat_inserted_constant(<4 x i8> %x) { ; CHECK-LABEL: @splat_inserted_constant( -; CHECK-NEXT: ret <5 x i8> +; CHECK-NEXT: ret <5 x i8> splat (i8 42) ; %ins3 = insertelement <4 x i8> %x, i8 42, i64 3 %splat5 = shufflevector <4 x i8> %ins3, <4 x i8> undef, <5 x i32> @@ -296,7 +296,7 @@ define <4 x i32> @fold_identity(<4 x i32> %x) { define <4 x i32> @fold_identity2(<4 x i32> %x) { ; CHECK-LABEL: @fold_identity2( -; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], splat (i32 1) ; CHECK-NEXT: ret <4 x i32> [[SHL]] ; %shl = shl <4 x i32> %x, diff --git a/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll b/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll index 98422706f653bf..d75c00e04c4ebf 100644 --- a/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll +++ b/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll @@ -36,7 +36,7 @@ define float @fadd_x_n0_ebmaytrap(float %a) #0 { define <2 x float> @fadd_vec_x_n0_ebmaytrap(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_x_n0_ebmaytrap( -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> , metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float>, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 @@ -54,7 +54,7 @@ define float @fadd_x_n0_ebstrict(float %a) #0 { define <2 x float> @fadd_vec_x_n0_ebstrict(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_x_n0_ebstrict( -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> , metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float>, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 @@ -72,7 +72,7 @@ define float @fadd_x_n0_neginf(float %a) #0 { define <2 x float> @fadd_vec_x_n0_neginf(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_x_n0_neginf( -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> , metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float>, metadata !"round.downward", metadata !"fpexcept.ignore") #0 @@ -90,7 +90,7 @@ define float @fadd_x_n0_dynamic(float %a) #0 { define <2 x float> @fadd_vec_x_n0_dynamic(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_x_n0_dynamic( -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> , metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float>, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 @@ -144,7 +144,7 @@ define float @fadd_nnan_x_n0_ebstrict(float %a) #0 { define <2 x float> @fadd_vec_nnan_x_n0_ebstrict(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_nnan_x_n0_ebstrict( -; CHECK-NEXT: [[RET:%.*]] = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> , metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[A]] ; %ret = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float>, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 @@ -164,7 +164,7 @@ define float @fadd_ninf_x_n0_ebstrict(float %a) #0 { ; Test with a fast math flag set but that flag is not "nnan". define <2 x float> @fadd_vec_ninf_x_n0_ebstrict(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_ninf_x_n0_ebstrict( -; CHECK-NEXT: [[RET:%.*]] = call ninf <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> , metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call ninf <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call ninf <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float>, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 @@ -200,7 +200,7 @@ define float @fadd_n0_x_ebmaytrap(float %a) #0 { ; TODO: Canonicalize the order of the arguments. Then this will fire. define <2 x float> @fadd_vec_n0_x_ebmaytrap(<2 x float> %a) #0 { ; CHECK-LABEL: @fadd_vec_n0_x_ebmaytrap( -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> , <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 diff --git a/llvm/test/Transforms/InstSimplify/uscmp.ll b/llvm/test/Transforms/InstSimplify/uscmp.ll index 47720060acb528..9b4f8dd7a5a4be 100644 --- a/llvm/test/Transforms/InstSimplify/uscmp.ll +++ b/llvm/test/Transforms/InstSimplify/uscmp.ll @@ -83,7 +83,7 @@ define i8 @ucmp_undef() { define <4 x i8> @ucmp_lt_splat() { ; CHECK-LABEL: define <4 x i8> @ucmp_lt_splat() { -; CHECK-NEXT: ret <4 x i8> +; CHECK-NEXT: ret <4 x i8> splat (i8 -1) ; %1 = call <4 x i8> @llvm.ucmp(<4 x i32> splat(i32 1), <4 x i32> splat(i32 3)) ret <4 x i8> %1 @@ -222,7 +222,7 @@ define i8 @ucmp_with_addition2(i32 %x) { define <4 x i8> @ucmp_with_addition_vec(<4 x i32> %x) { ; CHECK-LABEL: define <4 x i8> @ucmp_with_addition_vec( ; CHECK-SAME: <4 x i32> [[X:%.*]]) { -; CHECK-NEXT: ret <4 x i8> +; CHECK-NEXT: ret <4 x i8> splat (i8 -1) ; %1 = add nuw <4 x i32> %x, splat(i32 1) %2 = call <4 x i8> @llvm.ucmp(<4 x i32> %x, <4 x i32> %1) diff --git a/llvm/test/Transforms/InstSimplify/vec-cmp.ll b/llvm/test/Transforms/InstSimplify/vec-cmp.ll index dce1261d7031c9..b2ae4b82287d5a 100644 --- a/llvm/test/Transforms/InstSimplify/vec-cmp.ll +++ b/llvm/test/Transforms/InstSimplify/vec-cmp.ll @@ -12,7 +12,7 @@ define <2 x i1> @nonzero_vec_splat(<2 x i32> %x) { define <2 x i1> @nonzero_vec_nonsplat(<2 x i32> %x) { ; CHECK-LABEL: @nonzero_vec_nonsplat( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %y = or <2 x i32> %x, %c = icmp ne <2 x i32> %y, zeroinitializer @@ -65,7 +65,7 @@ define <2 x i1> @nonzero_vec_mul_nuw(<2 x i32> %x, <2 x i32> %y) { ; Multiplies of non-zero numbers are non-zero if there is no signed overflow. define <2 x i1> @nonzero_vec_mul_nsw(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @nonzero_vec_mul_nsw( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %xnz = or <2 x i32> %x, %ynz = or <2 x i32> %y, diff --git a/llvm/test/Transforms/InstSimplify/vec-icmp-of-cast.ll b/llvm/test/Transforms/InstSimplify/vec-icmp-of-cast.ll index 4acf2fba1934f4..8b27ab1f0ef267 100644 --- a/llvm/test/Transforms/InstSimplify/vec-icmp-of-cast.ll +++ b/llvm/test/Transforms/InstSimplify/vec-icmp-of-cast.ll @@ -63,7 +63,7 @@ define <2 x i1> @icmp_eq_zext_unused(<2 x i8> %x) { define <2 x i1> @icmp_ne_zext_is_true(<2 x i8> %x) { ; CHECK-LABEL: @icmp_ne_zext_is_true( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %xext = zext <2 x i8> %x to <2 x i32> %cmp = icmp ne <2 x i32> %xext, @@ -72,7 +72,7 @@ define <2 x i1> @icmp_ne_zext_is_true(<2 x i8> %x) { define <2 x i1> @icmp_ult_zext_is_true(<2 x i8> %x) { ; CHECK-LABEL: @icmp_ult_zext_is_true( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %xext = zext <2 x i8> %x to <2 x i32> %cmp = icmp ult <2 x i32> %xext, @@ -81,7 +81,7 @@ define <2 x i1> @icmp_ult_zext_is_true(<2 x i8> %x) { define <2 x i1> @icmp_ule_zext_is_true(<2 x i8> %x) { ; CHECK-LABEL: @icmp_ule_zext_is_true( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %xext = zext <2 x i8> %x to <2 x i32> %cmp = icmp ule <2 x i32> %xext, @@ -150,7 +150,7 @@ define <2 x i1> @icmp_eq_sext_fail(<2 x i8> %x) { define <2 x i1> @icmp_ne_sext_is_true(<2 x i8> %x) { ; CHECK-LABEL: @icmp_ne_sext_is_true( -; CHECK-NEXT: ret <2 x i1> +; CHECK-NEXT: ret <2 x i1> splat (i1 true) ; %xext = sext <2 x i8> %x to <2 x i32> %cmp = icmp ne <2 x i32> %xext, diff --git a/llvm/test/Transforms/InstSimplify/vector_gep.ll b/llvm/test/Transforms/InstSimplify/vector_gep.ll index a1d0bd379aa740..c373e15f19e822 100644 --- a/llvm/test/Transforms/InstSimplify/vector_gep.ll +++ b/llvm/test/Transforms/InstSimplify/vector_gep.ll @@ -56,7 +56,7 @@ define <4 x ptr> @test4(<4 x ptr> %a) { define <4 x ptr> @test5() { ; CHECK-LABEL: define <4 x ptr> @test5() { -; CHECK-NEXT: ret <4 x ptr> getelementptr (i8, <4 x ptr> , <4 x i64> ) +; CHECK-NEXT: ret <4 x ptr> getelementptr (i8, <4 x ptr> , <4 x i64> splat (i64 1)) ; %c = inttoptr <4 x i64> to <4 x ptr> %gep = getelementptr i8, <4 x ptr> %c, <4 x i32> diff --git a/llvm/test/Transforms/InstSimplify/xor.ll b/llvm/test/Transforms/InstSimplify/xor.ll index 229e943a3836f2..eaa016b6614e13 100644 --- a/llvm/test/Transforms/InstSimplify/xor.ll +++ b/llvm/test/Transforms/InstSimplify/xor.ll @@ -23,7 +23,7 @@ define i4 @xor_and_or_not_commute0(i4 %a, i4 %b) { define <2 x i4> @xor_and_or_not_commute1(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @xor_and_or_not_commute1( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[A:%.*]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOT]] ; %and = and <2 x i4> %a, %b @@ -47,7 +47,7 @@ define i74 @xor_and_or_not_commute2(i74 %a, i74 %b) { define <2 x i4> @xor_and_or_not_commute3(<2 x i4> %a, <2 x i4> %b) { ; CHECK-LABEL: @xor_and_or_not_commute3( -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[A:%.*]], +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[A:%.*]], splat (i4 -1) ; CHECK-NEXT: ret <2 x i4> [[NOT]] ; %and = and <2 x i4> %b, %a diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll index 167d282edb3e72..41dcf2a0637230 100644 --- a/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll @@ -12,7 +12,7 @@ define <2 x double> @shuffle_binop_fol(ptr %ptr) { ; CHECK-NEXT: vector.body.preheader: ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[PTR:%.*]], align 8 ; CHECK-NEXT: [[EXTRACTED1:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[EXTRACTED2:%.*]] = shufflevector <4 x double> , <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[EXTRACTED2:%.*]] = shufflevector <4 x double> splat (double 1.000000e+00), <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[FADD3:%.*]] = fadd <2 x double> [[EXTRACTED1]], [[EXTRACTED2]] ; CHECK-NEXT: ret <2 x double> [[FADD3]] ; @@ -27,7 +27,7 @@ define <2 x double> @shuffle_binop_fol_oob(ptr %ptr) { ; CHECK-LABEL: @shuffle_binop_fol_oob( ; CHECK-NEXT: vector.body.preheader: ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[FADD:%.*]] = fadd <4 x double> [[WIDE_LOAD]], +; CHECK-NEXT: [[FADD:%.*]] = fadd <4 x double> [[WIDE_LOAD]], splat (double 1.000000e+00) ; CHECK-NEXT: [[EXTRACTED:%.*]] = shufflevector <4 x double> [[FADD]], <4 x double> undef, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[EXTRACTED]] ; diff --git a/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll b/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll index f38f948c2a2e50..e6a8af60f12872 100644 --- a/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll +++ b/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll @@ -217,7 +217,7 @@ define void @f4(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) { ; CHECK-NEXT: [[STORE_FORWARD_CAST]] = bitcast i32 [[A_P1]] to <2 x half> ; CHECK-NEXT: store i32 [[A_P1]], ptr [[AIDX_NEXT]], align 4 ; CHECK-NEXT: [[A:%.*]] = load <2 x half>, ptr [[AIDX]], align 4 -; CHECK-NEXT: [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], +; CHECK-NEXT: [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], splat (half 0xH4000) ; CHECK-NEXT: [[C_INT:%.*]] = bitcast <2 x half> [[C]] to i32 ; CHECK-NEXT: store i32 [[C_INT]], ptr [[CIDX]], align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]] diff --git a/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll b/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll index b5316f553c21b7..56b910eebea92f 100644 --- a/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll +++ b/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll @@ -217,7 +217,7 @@ define void @f4(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) { ; CHECK-NEXT: [[STORE_FORWARD_CAST]] = bitcast i32 [[A_P1]] to <2 x half> ; CHECK-NEXT: store i32 [[A_P1]], ptr [[AIDX_NEXT]], align 4 ; CHECK-NEXT: [[A:%.*]] = load <2 x half>, ptr [[AIDX]], align 4 -; CHECK-NEXT: [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], +; CHECK-NEXT: [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], splat (half 0xH4000) ; CHECK-NEXT: [[C_INT:%.*]] = bitcast <2 x half> [[C]] to i32 ; CHECK-NEXT: store i32 [[C_INT]], ptr [[CIDX]], align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]] diff --git a/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll b/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll index 2bb6f05b91b1ab..3673b1b4d2241a 100644 --- a/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll +++ b/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll @@ -28,7 +28,7 @@ define void @unroll_upper(ptr noundef %pSrc, ptr nocapture noundef writeonly %pD ; CHECK-NEXT: [[NEXT_GEP37:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[AND]]) ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[NEXT_GEP37]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison) -; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[WIDE_MASKED_LOAD]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[WIDE_MASKED_LOAD]], splat (i16 8) ; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> ; CHECK-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP3]], ptr [[NEXT_GEP]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll index d0041668e77bde..456875ecb7d1fc 100644 --- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll +++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll @@ -32,22 +32,22 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND12:%.*]] = phi <16 x i32> [ , [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT13_1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> , [[VEC_IND12]] +; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12]] ; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], -; CHECK-NEXT: [[TMP9:%.*]] = shl <16 x i32> , [[VEC_IND_NEXT13]] +; CHECK-NEXT: [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], splat (i32 16) +; CHECK-NEXT: [[TMP9:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND_NEXT13]] ; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i32> [[TMP9]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <16 x i32> [[TMP10]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_NEXT]] ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1 ; CHECK-NEXT: [[INDEX_NEXT_1]] = add i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], +; CHECK-NEXT: [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], splat (i32 32) ; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 ; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] ; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]] @@ -63,10 +63,10 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add ; CHECK: vector.body.epil.preheader: ; CHECK-NEXT: br label [[VECTOR_BODY_EPIL:%.*]] ; CHECK: vector.body.epil: -; CHECK-NEXT: [[TMP14:%.*]] = shl <16 x i32> , [[VEC_IND12_UNR]] +; CHECK-NEXT: [[TMP14:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12_UNR]] ; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i32> [[TMP14]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <16 x i32> [[TMP15]], zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49) ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_UNR]] ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP18]], align 1 ; CHECK-NEXT: br label [[MIDDLE_BLOCK]] diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll index c2a8675f7ebbab..cd4198f8160f73 100644 --- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll +++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll @@ -32,22 +32,22 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND12:%.*]] = phi <16 x i32> [ , [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT13_1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> , [[VEC_IND12]] +; CHECK-NEXT: [[TMP4:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12]] ; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], -; CHECK-NEXT: [[TMP9:%.*]] = shl <16 x i32> , [[VEC_IND_NEXT13]] +; CHECK-NEXT: [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], splat (i32 16) +; CHECK-NEXT: [[TMP9:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND_NEXT13]] ; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i32> [[TMP9]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <16 x i32> [[TMP10]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_NEXT]] ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1 ; CHECK-NEXT: [[INDEX_NEXT_1]] = add i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], +; CHECK-NEXT: [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], splat (i32 32) ; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 ; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] ; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]] @@ -63,10 +63,10 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add ; CHECK: vector.body.epil.preheader: ; CHECK-NEXT: br label [[VECTOR_BODY_EPIL:%.*]] ; CHECK: vector.body.epil: -; CHECK-NEXT: [[TMP14:%.*]] = shl <16 x i32> , [[VEC_IND12_UNR]] +; CHECK-NEXT: [[TMP14:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12_UNR]] ; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i32> [[TMP14]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <16 x i32> [[TMP15]], zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> , <16 x i8> +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49) ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_UNR]] ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP18]], align 1 ; CHECK-NEXT: br label [[MIDDLE_BLOCK]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll index cec9aa42d46935..ddf6c1005e0586 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll @@ -27,7 +27,7 @@ define void @test_blend_feeding_replicated_store_1(i64 %N, ptr noalias %src, ptr ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i1> zeroinitializer, <16 x i1> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP7]], [[TMP8]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP7]], <16 x ptr> [[BROADCAST_SPLAT]], <16 x ptr> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP9]], i32 0 @@ -217,11 +217,11 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[GEP_SRC]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP4]], <16 x i1> [[TMP5]], <16 x i1> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> splat (i8 1) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll index dcb8ba73661667..4f050877bd1316 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll @@ -21,8 +21,8 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) { ; CHECK-NEXT: [[WIDE_LOAD1]] = load <2 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[WIDE_LOAD]], <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[WIDE_LOAD]], <2 x i64> [[WIDE_LOAD1]], <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> , <2 x i64> [[TMP6]], <2 x i64> ) -; CHECK-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> , <2 x i64> [[TMP7]], <2 x i64> ) +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP6]], <2 x i64> splat (i64 1)) +; CHECK-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP7]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 7f325ce1a1f04b..c6e58326158a37 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -89,8 +89,8 @@ define void @loop_dependent_cond(ptr %src, ptr noalias %dst, i64 %N) { ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP6]], align 8 ; DEFAULT-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD]]) ; DEFAULT-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD1]]) -; DEFAULT-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP7]], -; DEFAULT-NEXT: [[TMP10:%.*]] = fcmp ogt <2 x double> [[TMP8]], +; DEFAULT-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP7]], splat (double 1.000000e+00) +; DEFAULT-NEXT: [[TMP10:%.*]] = fcmp ogt <2 x double> [[TMP8]], splat (double 1.000000e+00) ; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 ; DEFAULT-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DEFAULT: pred.store.if: @@ -390,7 +390,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED: vector.body: ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; PRED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], +; PRED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 99) ; PRED-NEXT: [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 ; PRED-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; PRED: pred.store.if: @@ -456,7 +456,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; PRED: pred.store.continue14: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 -; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104 ; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: @@ -744,7 +744,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: [[TMP1:%.*]] = load i16, ptr [[SRC]], align 2 ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], +; DEFAULT-NEXT: [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], splat (i16 1) ; DEFAULT-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[TMP2]] to <8 x double> ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i32 0 ; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[TMP4]], align 8 @@ -869,7 +869,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ] ; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ] ; DEFAULT-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8 -; DEFAULT-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], +; DEFAULT-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 6) ; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 ; DEFAULT-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DEFAULT: pred.store.if: @@ -942,7 +942,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 ; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE14]] ; DEFAULT: pred.store.continue14: -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; DEFAULT-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; DEFAULT: middle.block: @@ -971,7 +971,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ] ; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ] ; PRED-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8 -; PRED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], +; PRED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 6) ; PRED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 ; PRED-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; PRED: pred.store.if: @@ -1044,7 +1044,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 ; PRED-NEXT: br label [[PRED_STORE_CONTINUE14]] ; PRED: pred.store.continue14: -; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; PRED-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; PRED: middle.block: @@ -1258,7 +1258,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE27]] ; DEFAULT: pred.store.continue27: ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; DEFAULT-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; DEFAULT: middle.block: @@ -1480,8 +1480,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED: pred.store.continue27: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 [[INDEX]], i64 [[TMP17]]) -; PRED-NEXT: [[TMP84:%.*]] = xor <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], -; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; PRED-NEXT: [[TMP84:%.*]] = xor <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; PRED-NEXT: [[TMP85:%.*]] = extractelement <8 x i1> [[TMP84]], i32 0 ; PRED-NEXT: br i1 [[TMP85]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; PRED: middle.block: @@ -1562,8 +1562,8 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], -; DEFAULT-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], +; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 20) +; DEFAULT-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) ; DEFAULT-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> ; DEFAULT-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; DEFAULT-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] @@ -1594,7 +1594,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE6]] ; DEFAULT: pred.store.continue6: ; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; DEFAULT: middle.block: @@ -1625,8 +1625,8 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; PRED: vector.body: ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; PRED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], -; PRED-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], +; PRED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 20) +; PRED-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) ; PRED-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> ; PRED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; PRED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] @@ -1657,7 +1657,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; PRED: pred.store.continue6: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; PRED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; PRED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll index 0f33e8fa79ce7b..2ba460255c607d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll @@ -33,14 +33,14 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16> ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw <16 x i16> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP5]], +; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP5]], splat (i16 8) ; CHECK-NEXT: [[TMP7:%.*]] = trunc nuw <16 x i16> [[TMP6]] to <16 x i8> ; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i16> ; CHECK-NEXT: [[TMP10:%.*]] = mul nuw <16 x i16> [[TMP9]], [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], splat (i16 8) ; CHECK-NEXT: [[TMP12:%.*]] = trunc nuw <16 x i16> [[TMP11]] to <16 x i8> ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -66,14 +66,14 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur ; CHECK-NEXT: [[TMP16:%.*]] = zext <8 x i8> [[WIDE_LOAD8]] to <8 x i16> ; CHECK-NEXT: [[TMP17:%.*]] = zext <8 x i8> [[WIDE_LOAD7]] to <8 x i16> ; CHECK-NEXT: [[TMP18:%.*]] = mul nuw <8 x i16> [[TMP16]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], +; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], splat (i16 8) ; CHECK-NEXT: [[TMP20:%.*]] = trunc nuw <8 x i16> [[TMP19]] to <8 x i8> ; CHECK-NEXT: store <8 x i8> [[TMP20]], ptr [[TMP15]], align 1 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX6]] ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1 ; CHECK-NEXT: [[TMP22:%.*]] = zext <8 x i8> [[WIDE_LOAD9]] to <8 x i16> ; CHECK-NEXT: [[TMP23:%.*]] = mul nuw <8 x i16> [[TMP22]], [[TMP17]] -; CHECK-NEXT: [[TMP24:%.*]] = lshr <8 x i16> [[TMP23]], +; CHECK-NEXT: [[TMP24:%.*]] = lshr <8 x i16> [[TMP23]], splat (i16 8) ; CHECK-NEXT: [[TMP25:%.*]] = trunc nuw <8 x i16> [[TMP24]] to <8 x i8> ; CHECK-NEXT: store <8 x i8> [[TMP25]], ptr [[TMP21]], align 1 ; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX6]], 8 @@ -162,43 +162,43 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[A]] to i16 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i16> poison, i16 [[B]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = lshr <16 x i16> [[TMP5]], -; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i16> poison, i16 [[B]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = lshr <16 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i16> [[TMP4]] to <16 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[INDEX]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16 -; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP12]], align 1 -; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP13]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[INDEX]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 +; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1 +; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[A]] to i16 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i64 0 -; CHECK-NEXT: [[TMP18:%.*]] = mul <8 x i16> [[TMP16]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], -; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i16> [[TMP19]] to <8 x i8> -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[A]] to i16 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x i16> poison, i16 [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i16> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = lshr <8 x i16> [[TMP14]], +; CHECK-NEXT: [[TMP16:%.*]] = trunc <8 x i16> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[INDEX7]] to i64 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]] -; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[INDEX7]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP18]] +; CHECK-NEXT: store <8 x i8> [[TMP17]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i32 [[INDEX7]], 8 -; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT8]], 1000 -; CHECK-NEXT: br i1 [[TMP24]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT8]], 1000 +; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -240,43 +240,43 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 % ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[A]] to i16 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[C]], <16 x i16> [[TMP5]], <16 x i16> [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = mul <16 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = lshr <16 x i16> [[TMP3]], splat (i16 8) +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <16 x i16> [[TMP4]], <16 x i16> [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i16> [[TMP5]] to <16 x i8> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[INDEX]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16 -; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP12]], align 1 -; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP13]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[INDEX]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 +; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1 +; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[A]] to i16 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0 -; CHECK-NEXT: [[TMP17:%.*]] = mul <8 x i16> [[TMP16]], -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i16> [[TMP17]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[C]], <8 x i16> [[TMP19]], <8 x i16> [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i16> [[TMP20]] to <8 x i8> +; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[A]] to i16 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x i16> poison, i16 [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = mul <8 x i16> [[TMP12]], +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i16> [[TMP13]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = lshr <8 x i16> [[TMP14]], splat (i16 8) +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[C]], <8 x i16> [[TMP15]], <8 x i16> [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = trunc <8 x i16> [[TMP16]] to <8 x i8> ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX3:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[INDEX3]] to i64 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]] -; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[INDEX3]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP18]] +; CHECK-NEXT: store <8 x i8> [[TMP17]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i32 [[INDEX3]], 8 -; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000 -; CHECK-NEXT: br i1 [[TMP24]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000 +; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -464,8 +464,8 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP1]], align 4 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP2]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP1]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 7749bb9edb124e..7c6a881ead1087 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -124,7 +124,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 @@ -132,7 +132,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -159,7 +159,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <2 x i64> [[VEC_IND6]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <2 x i64> [[VEC_IND6]], +; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <2 x i64> [[VEC_IND6]], splat (i64 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -213,7 +213,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP1]] @@ -222,7 +222,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP3]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -252,7 +252,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; CHECK-NEXT: store <2 x i64> [[VEC_IND11]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX7]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <2 x i64> [[VEC_IND11]], +; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <2 x i64> [[VEC_IND11]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -301,17 +301,17 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 10) +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 10) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP4]], align 4 ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IND_END4]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -335,11 +335,11 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: [[VEC_IND8:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX7]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[VEC_IND8]], +; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[VEC_IND8]], splat (i64 10) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <2 x i64> [[VEC_IND8]], +; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <2 x i64> [[VEC_IND8]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[IND_END]] ; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -393,7 +393,7 @@ define void @test_widen_extended_induction(ptr %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i64 @@ -403,7 +403,7 @@ define void @test_widen_extended_induction(ptr %dst) { ; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP3]], align 1 ; CHECK-NEXT: store <2 x i8> [[STEP_ADD]], ptr [[TMP4]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], splat (i8 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -427,7 +427,7 @@ define void @test_widen_extended_induction(ptr %dst) { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <2 x i8> [[VEC_IND3]], ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[INDEX2]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], splat (i8 2) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT6]], 10000 ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -475,7 +475,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 @@ -483,7 +483,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP2]], align 1 ; CHECK-NEXT: store <2 x i8> [[STEP_ADD]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], splat (i8 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: @@ -506,7 +506,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 ; CHECK-NEXT: store <2 x i8> [[VEC_IND3]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX2]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], splat (i8 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT5]], 10000 ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll index e9c9288e734394..86a9af6fd5a3cd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll @@ -16,7 +16,7 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) { ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -65,8 +65,8 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) { ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP26]], ptr [[TMP29]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX_NEXT]], i64 1002) -; CHECK-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP30]], i32 0 ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index eafe6921b37ca7..547e7afbefcafe 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -14,8 +14,8 @@ define double @test_reduction_costs() { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> ) -; CHECK-NEXT: [[TMP1]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI1]], <2 x double> ) +; CHECK-NEXT: [[TMP0]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> splat (double 3.000000e+00)) +; CHECK-NEXT: [[TMP1]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI1]], <2 x double> splat (double 9.000000e+00)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -214,8 +214,8 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE74:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP47]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP47]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP47]], splat (i1 true) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: @@ -240,8 +240,8 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]] ; CHECK: [[PRED_STORE_CONTINUE47]]: -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP2]], -; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP2]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 @@ -274,8 +274,8 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = or <2 x i1> [[TMP47]], [[TMP20]] ; CHECK-NEXT: [[TMP23:%.*]] = or <2 x i1> [[TMP47]], [[TMP21]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP20]], <2 x i64> zeroinitializer, <2 x i64> -; CHECK-NEXT: [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP20]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1) +; CHECK-NEXT: [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1) ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP22]], i32 0 ; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF59:.*]], label %[[PRED_STORE_CONTINUE60:.*]] ; CHECK: [[PRED_STORE_IF59]]: @@ -304,8 +304,8 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: store i64 [[TMP31]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE66]] ; CHECK: [[PRED_STORE_CONTINUE66]]: -; CHECK-NEXT: [[TMP32:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], -; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], +; CHECK-NEXT: [[TMP32:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true) +; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true) ; CHECK-NEXT: [[TMP34:%.*]] = select <2 x i1> [[TMP18]], <2 x i1> [[TMP32]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP35:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[TMP33]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP36:%.*]] = or <2 x i1> [[TMP22]], [[TMP34]] @@ -313,29 +313,29 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0 ; CHECK-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF67:.*]], label %[[PRED_STORE_CONTINUE68:.*]] ; CHECK: [[PRED_STORE_IF67]]: -; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]] -; CHECK-NEXT: store i64 [[TMP39]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]] +; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]] +; CHECK-NEXT: store i64 [[TMP45]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE68]] ; CHECK: [[PRED_STORE_CONTINUE68]]: ; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1 ; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF69:.*]], label %[[PRED_STORE_CONTINUE70:.*]] ; CHECK: [[PRED_STORE_IF69]]: -; CHECK-NEXT: [[TMP41:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] -; CHECK-NEXT: store i64 [[TMP41]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] +; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] +; CHECK-NEXT: store i64 [[TMP39]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE70]] ; CHECK: [[PRED_STORE_CONTINUE70]]: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0 ; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF71:.*]], label %[[PRED_STORE_CONTINUE72:.*]] ; CHECK: [[PRED_STORE_IF71]]: -; CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] -; CHECK-NEXT: store i64 [[TMP43]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] +; CHECK-NEXT: [[TMP41:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] +; CHECK-NEXT: store i64 [[TMP41]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE72]] ; CHECK: [[PRED_STORE_CONTINUE72]]: ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP37]], i32 1 ; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF73:.*]], label %[[PRED_STORE_CONTINUE74]] ; CHECK: [[PRED_STORE_IF73]]: -; CHECK-NEXT: [[TMP45:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] -; CHECK-NEXT: store i64 [[TMP45]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] +; CHECK-NEXT: [[TMP43:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] +; CHECK-NEXT: store i64 [[TMP43]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE74]] ; CHECK: [[PRED_STORE_CONTINUE74]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 553989544c7787..2f756ab4b0e1ab 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -355,8 +355,8 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; PRED: pred.store.continue2: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; PRED-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], -; PRED-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; PRED-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; PRED-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP24]], i32 0 ; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: middle.block: @@ -549,8 +549,8 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; PRED: pred.store.continue8: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]]) -; PRED-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], -; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; PRED-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; PRED-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP35]], i32 0 ; PRED-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; PRED: middle.block: @@ -746,8 +746,8 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED: pred.store.continue7: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; PRED-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], -; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; PRED-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) +; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP34]], i32 0 ; PRED-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; PRED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index cd7662a657dfe6..8f40c31382a860 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -30,8 +30,8 @@ define i32 @multi_exit_iv_uniform(i32 %a, i64 %N, ptr %dst) { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP5]], i32 4 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP8]], align 8 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP9]], align 8 -; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP11]] = add <4 x i32> [[VEC_PHI1]], +; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI]], splat (i32 -1) +; CHECK-NEXT: [[TMP11]] = add <4 x i32> [[VEC_PHI1]], splat (i32 -1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -225,7 +225,7 @@ define void @wide_truncated_iv(ptr %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i8> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP2]], i32 0 @@ -233,7 +233,7 @@ define void @wide_truncated_iv(ptr %dst) { ; CHECK-NEXT: store <8 x i8> [[VEC_IND]], ptr [[TMP4]], align 1 ; CHECK-NEXT: store <8 x i8> [[STEP_ADD]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[STEP_ADD]], splat (i8 8) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 192 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -269,153 +269,69 @@ exit: } define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) { -; DEFAULT-LABEL: define i64 @test_ptr_ivs_and_widened_ivs( -; DEFAULT-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) { -; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; DEFAULT-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; DEFAULT-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; DEFAULT: vector.ph: -; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 -; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 -; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]] -; DEFAULT-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 -; DEFAULT-NEXT: [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32 -; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] -; DEFAULT: vector.body: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], -; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 -; DEFAULT-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; DEFAULT-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16 -; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]] -; DEFAULT-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]] -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0 -; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 -; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 -; DEFAULT-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 -; DEFAULT-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[WIDE_LOAD]], -; DEFAULT-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[WIDE_LOAD7]], -; DEFAULT-NEXT: [[TMP10:%.*]] = zext <4 x i32> [[TMP8]] to <4 x i64> -; DEFAULT-NEXT: [[TMP11:%.*]] = zext <4 x i32> [[TMP9]] to <4 x i64> -; DEFAULT-NEXT: [[TMP12:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64> -; DEFAULT-NEXT: [[TMP13:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64> -; DEFAULT-NEXT: [[TMP14:%.*]] = shl <4 x i64> [[TMP10]], [[TMP12]] -; DEFAULT-NEXT: [[TMP15]] = shl <4 x i64> [[TMP11]], [[TMP13]] -; DEFAULT-NEXT: [[TMP16:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP14]], <4 x i32> -; DEFAULT-NEXT: [[TMP17:%.*]] = shufflevector <4 x i64> [[TMP14]], <4 x i64> [[TMP15]], <4 x i32> -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], -; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; DEFAULT: middle.block: -; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP15]], i32 2 -; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] -; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP15]], i32 3 -; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; DEFAULT-NEXT: br label [[LOOP:%.*]] -; DEFAULT: loop: -; DEFAULT-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[SHL:%.*]], [[LOOP]] ] -; DEFAULT-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] -; DEFAULT-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] -; DEFAULT-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] -; DEFAULT-NEXT: [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4 -; DEFAULT-NEXT: [[NOT:%.*]] = xor i32 [[L]], 1 -; DEFAULT-NEXT: [[NOT_EXT:%.*]] = zext i32 [[NOT]] to i64 -; DEFAULT-NEXT: [[IV_EXT:%.*]] = zext i32 [[IV_1]] to i64 -; DEFAULT-NEXT: [[SHL]] = shl i64 [[NOT_EXT]], [[IV_EXT]] -; DEFAULT-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4 -; DEFAULT-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1 -; DEFAULT-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 -; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_2_NEXT]], [[N]] -; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] -; DEFAULT: exit: -; DEFAULT-NEXT: [[P_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] -; DEFAULT-NEXT: ret i64 [[P_LCSSA]] -; -; PRED-LABEL: define i64 @test_ptr_ivs_and_widened_ivs( -; PRED-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) { -; PRED-NEXT: entry: -; PRED-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; PRED-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; PRED-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; PRED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 -; PRED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; PRED: vector.ph: -; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 -; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; PRED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 -; PRED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]] -; PRED-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 -; PRED-NEXT: [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32 -; PRED-NEXT: br label [[VECTOR_BODY:%.*]] -; PRED: vector.body: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; PRED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], -; PRED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 -; PRED-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; PRED-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16 -; PRED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]] -; PRED-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]] -; PRED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0 -; PRED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 -; PRED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 -; PRED-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 -; PRED-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[WIDE_LOAD]], -; PRED-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[WIDE_LOAD7]], -; PRED-NEXT: [[TMP10:%.*]] = zext <4 x i32> [[TMP8]] to <4 x i64> -; PRED-NEXT: [[TMP11:%.*]] = zext <4 x i32> [[TMP9]] to <4 x i64> -; PRED-NEXT: [[TMP12:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64> -; PRED-NEXT: [[TMP13:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64> -; PRED-NEXT: [[TMP14:%.*]] = shl <4 x i64> [[TMP10]], [[TMP12]] -; PRED-NEXT: [[TMP15]] = shl <4 x i64> [[TMP11]], [[TMP13]] -; PRED-NEXT: [[TMP16:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP14]], <4 x i32> -; PRED-NEXT: [[TMP17:%.*]] = shufflevector <4 x i64> [[TMP14]], <4 x i64> [[TMP15]], <4 x i32> -; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], -; PRED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; PRED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; PRED: middle.block: -; PRED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] -; PRED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP15]], i32 3 -; PRED-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP15]], i32 2 -; PRED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; PRED: scalar.ph: -; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ] -; PRED-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; PRED-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; PRED-NEXT: br label [[LOOP:%.*]] -; PRED: loop: -; PRED-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[SHL:%.*]], [[LOOP]] ] -; PRED-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] -; PRED-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] -; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] -; PRED-NEXT: [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4 -; PRED-NEXT: [[NOT:%.*]] = xor i32 [[L]], 1 -; PRED-NEXT: [[NOT_EXT:%.*]] = zext i32 [[NOT]] to i64 -; PRED-NEXT: [[IV_EXT:%.*]] = zext i32 [[IV_1]] to i64 -; PRED-NEXT: [[SHL]] = shl i64 [[NOT_EXT]], [[IV_EXT]] -; PRED-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4 -; PRED-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1 -; PRED-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 -; PRED-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_2_NEXT]], [[N]] -; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] -; PRED: exit: -; PRED-NEXT: [[P_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] -; PRED-NEXT: ret i64 [[P_LCSSA]] +; CHECK-LABEL: define i64 @test_ptr_ivs_and_widened_ivs( +; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]] +; CHECK-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i32> [[TMP7]] to <4 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64> +; CHECK-NEXT: [[TMP10]] = shl <4 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP10]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP10]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[P:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[SHL:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4 +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[L]], 1 +; CHECK-NEXT: [[NOT_EXT:%.*]] = zext i32 [[NOT]] to i64 +; CHECK-NEXT: [[IV_EXT:%.*]] = zext i32 [[IV_1]] to i64 +; CHECK-NEXT: [[SHL]] = shl i64 [[NOT_EXT]], [[IV_EXT]] +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4 +; CHECK-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1 +; CHECK-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_2_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[P_LCSSA:%.*]] = phi i64 [ [[P]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[P_LCSSA]] ; entry: br label %loop @@ -441,72 +357,56 @@ exit: } define void @zext_iv_increment(ptr %dst, i64 %N) { -; DEFAULT-LABEL: define void @zext_iv_increment( -; DEFAULT-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { -; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) -; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2 -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; DEFAULT: vector.scevcheck: -; DEFAULT-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) -; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 -; DEFAULT-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; DEFAULT-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 -; DEFAULT-NEXT: [[TMP3:%.*]] = add i32 1, [[TMP2]] -; DEFAULT-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 1 -; DEFAULT-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; DEFAULT-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] -; DEFAULT-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] -; DEFAULT: vector.ph: -; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2 -; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] -; DEFAULT-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 -; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] -; DEFAULT: vector.body: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; DEFAULT-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 -; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[TMP7]], i32 2 -; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2 -; DEFAULT-NEXT: store i32 0, ptr [[TMP9]], align 8 -; DEFAULT-NEXT: store i32 0, ptr [[TMP10]], align 8 -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; DEFAULT: middle.block: -; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP173_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; DEFAULT-NEXT: br label [[FOR_BODY174:%.*]] -; DEFAULT: loop: -; DEFAULT-NEXT: [[I167_0800:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[INC179:%.*]], [[FOR_BODY174]] ] -; DEFAULT-NEXT: [[CONV169801:%.*]] = phi i64 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[CONV169:%.*]], [[FOR_BODY174]] ] -; DEFAULT-NEXT: [[PATCH_INDEX:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[CONV169801]], i32 2 -; DEFAULT-NEXT: store i32 0, ptr [[PATCH_INDEX]], align 8 -; DEFAULT-NEXT: [[INC179]] = add i32 [[I167_0800]], 1 -; DEFAULT-NEXT: [[CONV169]] = zext i32 [[INC179]] to i64 -; DEFAULT-NEXT: [[CMP172:%.*]] = icmp ult i64 [[CONV169]], [[N]] -; DEFAULT-NEXT: br i1 [[CMP172]], label [[FOR_BODY174]], label [[FOR_COND_CLEANUP173_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]] -; DEFAULT: exit: -; DEFAULT-NEXT: ret void -; -; PRED-LABEL: define void @zext_iv_increment( -; PRED-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { -; PRED-NEXT: entry: -; PRED-NEXT: br label [[FOR_BODY174:%.*]] -; PRED: loop: -; PRED-NEXT: [[I167_0800:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC179:%.*]], [[FOR_BODY174]] ] -; PRED-NEXT: [[CONV169801:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[CONV169:%.*]], [[FOR_BODY174]] ] -; PRED-NEXT: [[PATCH_INDEX:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[CONV169801]], i32 2 -; PRED-NEXT: store i32 0, ptr [[PATCH_INDEX]], align 8 -; PRED-NEXT: [[INC179]] = add i32 [[I167_0800]], 1 -; PRED-NEXT: [[CONV169]] = zext i32 [[INC179]] to i64 -; PRED-NEXT: [[CMP172:%.*]] = icmp ult i64 [[CONV169]], [[N]] -; PRED-NEXT: br i1 [[CMP172]], label [[FOR_BODY174]], label [[FOR_COND_CLEANUP173_LOOPEXIT:%.*]] -; PRED: exit: -; PRED-NEXT: ret void +; CHECK-LABEL: define void @zext_iv_increment( +; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 1, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[TMP7]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2 +; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 8 +; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_WIDE:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_NEXT_EXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PATCH_INDEX:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[IV_WIDE]], i32 2 +; CHECK-NEXT: store i32 0, ptr [[PATCH_INDEX]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT_EXT]] = zext i32 [[IV_NEXT]] to i64 +; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV_NEXT_EXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -536,4 +436,8 @@ exit: ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index 1a4ed0f21bf4c5..292d8e2077d202 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -27,7 +27,7 @@ define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i8> [[WIDE_LOAD]], splat (i8 2) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1 @@ -47,21 +47,21 @@ define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX4]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = add <8 x i8> [[WIDE_LOAD6]], +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = add <8 x i8> [[WIDE_LOAD5]], splat (i8 2) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 ; CHECK-NEXT: store <8 x i8> [[TMP11]], ptr [[TMP13]], align 1 -; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 8 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -130,7 +130,7 @@ define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <16 x i8> [[WIDE_LOAD]], splat (i8 2) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1 @@ -150,21 +150,21 @@ define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX4]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw <8 x i8> [[WIDE_LOAD6]], +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw <8 x i8> [[WIDE_LOAD5]], splat (i8 2) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 ; CHECK-NEXT: store <8 x i8> [[TMP11]], ptr [[TMP13]], align 1 -; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 8 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -224,7 +224,7 @@ define void @add_b(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[WIDE_LOAD]], splat (i16 2) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[TMP6]], align 2 @@ -301,7 +301,7 @@ define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i16> [[TMP4]], splat (i16 2) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP7]], align 2 @@ -321,22 +321,22 @@ define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX4]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i8>, ptr [[TMP11]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = zext <8 x i8> [[WIDE_LOAD6]] to <8 x i16> -; CHECK-NEXT: [[TMP13:%.*]] = add <8 x i16> [[TMP12]], +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i8>, ptr [[TMP11]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext <8 x i8> [[WIDE_LOAD5]] to <8 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = add <8 x i16> [[TMP12]], splat (i16 2) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP14]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP13]], ptr [[TMP15]], align 2 -; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 8 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 8 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -401,7 +401,7 @@ define void @add_d(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[TMP4]], splat (i32 2) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP7]], align 4 @@ -483,12 +483,12 @@ define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i8> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i8> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = mul <16 x i8> [[TMP8]], +; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 4) +; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[TMP6]], splat (i8 32) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i8> [[WIDE_LOAD]], splat (i8 51) +; CHECK-NEXT: [[TMP9:%.*]] = mul <16 x i8> [[TMP8]], splat (i8 60) ; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i8> [[TMP7]], [[TMP1]] -; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP9]], +; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP9]], splat (i8 -4) ; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i8> [[TMP11]], [[TMP2]] ; CHECK-NEXT: [[TMP13:%.*]] = mul <16 x i8> [[TMP12]], [[TMP10]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]] @@ -508,36 +508,36 @@ define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 8 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT7]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT8]] to <8 x i8> -; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT9]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT10]] to <8 x i8> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT6]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT7]] to <8 x i8> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT9]] to <8 x i8> ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX11]], 0 +; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX10]], 0 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = shl <8 x i8> [[WIDE_LOAD12]], -; CHECK-NEXT: [[TMP23:%.*]] = add <8 x i8> [[TMP22]], -; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i8> [[WIDE_LOAD12]], -; CHECK-NEXT: [[TMP25:%.*]] = mul <8 x i8> [[TMP24]], +; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = shl <8 x i8> [[WIDE_LOAD11]], splat (i8 4) +; CHECK-NEXT: [[TMP23:%.*]] = add <8 x i8> [[TMP22]], splat (i8 32) +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i8> [[WIDE_LOAD11]], splat (i8 51) +; CHECK-NEXT: [[TMP25:%.*]] = mul <8 x i8> [[TMP24]], splat (i8 60) ; CHECK-NEXT: [[TMP26:%.*]] = and <8 x i8> [[TMP23]], [[TMP17]] -; CHECK-NEXT: [[TMP27:%.*]] = and <8 x i8> [[TMP25]], +; CHECK-NEXT: [[TMP27:%.*]] = and <8 x i8> [[TMP25]], splat (i8 -4) ; CHECK-NEXT: [[TMP28:%.*]] = xor <8 x i8> [[TMP27]], [[TMP18]] ; CHECK-NEXT: [[TMP29:%.*]] = mul <8 x i8> [[TMP28]], [[TMP26]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i32 0 ; CHECK-NEXT: store <8 x i8> [[TMP29]], ptr [[TMP31]], align 1 -; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX11]], 8 -; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC5]] +; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], 8 +; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP32]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -632,13 +632,13 @@ define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[TMP5]], align 2 ; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i16> [[WIDE_LOAD]] to <16 x i8> -; CHECK-NEXT: [[TMP7:%.*]] = shl <16 x i8> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = add <16 x i8> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = and <16 x i8> [[TMP6]], -; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], -; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i8> [[TMP10]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <16 x i8> [[TMP6]], splat (i8 4) +; CHECK-NEXT: [[TMP8:%.*]] = add <16 x i8> [[TMP7]], splat (i8 32) +; CHECK-NEXT: [[TMP9:%.*]] = and <16 x i8> [[TMP6]], splat (i8 -52) +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], splat (i8 51) +; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i8> [[TMP10]], splat (i8 60) ; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i8> [[TMP8]], [[TMP1]] -; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i8> [[TMP11]], +; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i8> [[TMP11]], splat (i8 -4) ; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i8> [[TMP13]], [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = mul <16 x i8> [[TMP14]], [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]] @@ -658,38 +658,38 @@ define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 8 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT7]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT8]] to <8 x i8> -; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT9]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT10]] to <8 x i8> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT6]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT7]] to <8 x i8> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT9]] to <8 x i8> ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX11]], 0 +; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX10]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[TMP22]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i16>, ptr [[TMP23]], align 2 -; CHECK-NEXT: [[TMP24:%.*]] = trunc <8 x i16> [[WIDE_LOAD12]] to <8 x i8> -; CHECK-NEXT: [[TMP25:%.*]] = shl <8 x i8> [[TMP24]], -; CHECK-NEXT: [[TMP26:%.*]] = add <8 x i8> [[TMP25]], -; CHECK-NEXT: [[TMP27:%.*]] = and <8 x i8> [[TMP24]], -; CHECK-NEXT: [[TMP28:%.*]] = or <8 x i8> [[TMP27]], -; CHECK-NEXT: [[TMP29:%.*]] = mul <8 x i8> [[TMP28]], +; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x i16>, ptr [[TMP23]], align 2 +; CHECK-NEXT: [[TMP24:%.*]] = trunc <8 x i16> [[WIDE_LOAD11]] to <8 x i8> +; CHECK-NEXT: [[TMP25:%.*]] = shl <8 x i8> [[TMP24]], splat (i8 4) +; CHECK-NEXT: [[TMP26:%.*]] = add <8 x i8> [[TMP25]], splat (i8 32) +; CHECK-NEXT: [[TMP27:%.*]] = and <8 x i8> [[TMP24]], splat (i8 -52) +; CHECK-NEXT: [[TMP28:%.*]] = or <8 x i8> [[TMP27]], splat (i8 51) +; CHECK-NEXT: [[TMP29:%.*]] = mul <8 x i8> [[TMP28]], splat (i8 60) ; CHECK-NEXT: [[TMP30:%.*]] = and <8 x i8> [[TMP26]], [[TMP19]] -; CHECK-NEXT: [[TMP31:%.*]] = and <8 x i8> [[TMP29]], +; CHECK-NEXT: [[TMP31:%.*]] = and <8 x i8> [[TMP29]], splat (i8 -4) ; CHECK-NEXT: [[TMP32:%.*]] = xor <8 x i8> [[TMP31]], [[TMP20]] ; CHECK-NEXT: [[TMP33:%.*]] = mul <8 x i8> [[TMP32]], [[TMP30]] ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i32 0 ; CHECK-NEXT: store <8 x i8> [[TMP33]], ptr [[TMP35]], align 1 -; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX11]], 8 -; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC5]] +; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], 8 +; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[TMP36]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -776,14 +776,14 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <16 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8> -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <16 x i32> [[TMP4]], splat (i32 2) +; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i32> [[TMP5]] to <16 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 +; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] @@ -798,10 +798,10 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[A_PHI:%.*]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV]] = zext i8 [[TMP10]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] @@ -859,14 +859,14 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i32> [[TMP8]] to <16 x i8> -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 -; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP11]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <16 x i32> [[TMP6]], splat (i32 2) +; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i8> +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 +; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP10]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15 @@ -877,15 +877,15 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[A_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR:%.*]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[A_PHI_LCSSA:%.*]] = phi i32 [ [[A_PHI:%.*]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[A_PHI_LCSSA]] to i8 ; CHECK-NEXT: ret i8 [[RET]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SCALAR_RECUR]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[A_PHI]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV]] = zext i8 [[TMP13]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CONV]] = zext i8 [[TMP12]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll index 2b6933654ac1ae..29795bc9f29821 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll @@ -26,7 +26,7 @@ ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]] ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> ) +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]] ; CHECK: br label %[[InnerLoop:.+]] @@ -34,15 +34,15 @@ ; CHECK: [[InnerLoop]]: ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] ; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> %[[InnerPhi]], -; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> splat (i1 true)) +; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], splat (i64 1) +; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], splat (i64 8) ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0 ; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] ; CHECK: [[ForInc]]: ; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], +; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) ; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body @@ -92,22 +92,22 @@ for.end10: ; preds = %for.inc8 ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] ; CHECK: %[[VecInd:.*]] = phi <2 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i64], ptr @arrX, i64 0, <2 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[VecInd]], <2 x ptr> %[[AAddr]], i32 4, <2 x i1> ) +; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[VecInd]], <2 x ptr> %[[AAddr]], i32 4, <2 x i1> splat (i1 true)) ; CHECK: %[[StoreVal:.*]] = add nsw <2 x i64> %[[VecInd]], %[[Splat]] ; CHECK: br label %[[InnerLoop:.+]] ; CHECK: [[InnerLoop]]: ; CHECK: %[[InnerPhi:.*]] = phi <2 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] ; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i64]], ptr @arrY, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[StoreVal]], <2 x ptr> %[[AAddr2]], i32 4, <2 x i1> -; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], -; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], +; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[StoreVal]], <2 x ptr> %[[AAddr2]], i32 4, <2 x i1> splat (i1 true)) +; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], splat (i64 1) +; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], splat (i64 8) ; CHECK: %[[InnerCond:.*]] = extractelement <2 x i1> %[[VecCond]], i32 0 ; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] ; CHECK: [[ForInc]]: ; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 2 -; CHECK: %[[VecIndNext]] = add <2 x i64> %[[VecInd]], +; CHECK: %[[VecIndNext]] = add <2 x i64> %[[VecInd]], splat (i64 2) ; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body ; Function Attrs: norecurse nounwind uwtable diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll index a8caae40aed161..4c31cfc14afb30 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll @@ -47,7 +47,7 @@ define i32 @foo(i32 noundef %n, i32 noundef %lag, i32 noundef %shift) vscale_ran ; SC_SVE-NEXT: [[TMP16:%.*]] = shl <4 x i32> [[TMP15]], [[BROADCAST_SPLAT]] ; SC_SVE-NEXT: [[TMP17]] = add <4 x i32> [[TMP16]], [[VEC_PHI]] ; SC_SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SC_SVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; SC_SVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; SC_SVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SC_SVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SC_SVE: middle.block: @@ -122,7 +122,7 @@ define i32 @foo(i32 noundef %n, i32 noundef %lag, i32 noundef %shift) vscale_ran ; NO_SC_SVE-NEXT: [[TMP16:%.*]] = shl <8 x i32> [[TMP15]], [[BROADCAST_SPLAT]] ; NO_SC_SVE-NEXT: [[TMP17]] = add <8 x i32> [[TMP16]], [[VEC_PHI]] ; NO_SC_SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; NO_SC_SVE-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; NO_SC_SVE-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; NO_SC_SVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO_SC_SVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO_SC_SVE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll index c9db83b935fb8e..fb5d513dfbd75e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll @@ -115,9 +115,9 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-UNORDERED-LABEL: @fadd_strict_unroll ; CHECK-UNORDERED: vector.body ; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD1:.*]], %vector.body ] -; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] -; CHECK-UNORDERED: %[[VEC_PHI3:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD3:.*]], %vector.body ] -; CHECK-UNORDERED: %[[VEC_PHI4:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD4:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI3:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD3:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI4:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD4:.*]], %vector.body ] ; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <8 x float>, ptr ; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <8 x float>, ptr ; CHECK-UNORDERED: %[[VEC_LOAD3:.*]] = load <8 x float>, ptr @@ -195,9 +195,9 @@ define float @fadd_strict_unroll_last_val(ptr noalias nocapture readonly %a, ptr ; CHECK-UNORDERED-LABEL: @fadd_strict_unroll_last_val ; CHECK-UNORDERED: vector.body ; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD1:.*]], %vector.body ] -; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] -; CHECK-UNORDERED: %[[VEC_PHI3:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD3:.*]], %vector.body ] -; CHECK-UNORDERED: %[[VEC_PHI4:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD4:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI3:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD3:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI4:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD4:.*]], %vector.body ] ; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <8 x float>, ptr ; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <8 x float>, ptr ; CHECK-UNORDERED: %[[VEC_LOAD3:.*]] = load <8 x float>, ptr @@ -273,8 +273,8 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED: %[[LOADA1:.*]] = load float, ptr %a ; CHECK-UNORDERED: %[[LOADA2:.*]] = load float, ptr %[[ARRAYIDX]] ; CHECK-UNORDERED: vector.ph -; CHECK-UNORDERED: %[[INS2:.*]] = insertelement <4 x float> , float %[[LOADA2]], i32 0 -; CHECK-UNORDERED: %[[INS1:.*]] = insertelement <4 x float> , float %[[LOADA1]], i32 0 +; CHECK-UNORDERED: %[[INS2:.*]] = insertelement <4 x float> splat (float -0.000000e+00), float %[[LOADA2]], i32 0 +; CHECK-UNORDERED: %[[INS1:.*]] = insertelement <4 x float> splat (float -0.000000e+00), float %[[LOADA1]], i32 0 ; CHECK-UNORDERED: vector.body ; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <4 x float> [ %[[INS2]], %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] ; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi <4 x float> [ %[[INS1]], %vector.ph ], [ %[[VEC_FADD1:.*]], %vector.body ] @@ -401,8 +401,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED: br i1 %[[EXTRACT]], label %pred.load.if, label %pred.load.continue ; CHECK-ORDERED: pred.load.continue6 ; CHECK-ORDERED: %[[PHI1:.*]] = phi <4 x float> [ %[[PHI0:.*]], %pred.load.continue4 ], [ %[[INS_ELT:.*]], %pred.load.if5 ] -; CHECK-ORDERED: %[[XOR:.*]] = xor <4 x i1> %[[FCMP1]], -; CHECK-ORDERED: %[[PRED:.*]] = select <4 x i1> %[[XOR]], <4 x float> , <4 x float> %[[PHI1]] +; CHECK-ORDERED: %[[XOR:.*]] = xor <4 x i1> %[[FCMP1]], splat (i1 true) +; CHECK-ORDERED: %[[PRED:.*]] = select <4 x i1> %[[XOR]], <4 x float> splat (float 3.000000e+00), <4 x float> %[[PHI1]] ; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.v4f32(float %[[PHI]], <4 x float> %[[PRED]]) ; CHECK-ORDERED: for.body ; CHECK-ORDERED: %[[RES_PHI:.*]] = phi float [ %[[MERGE_RDX:.*]], %scalar.ph ], [ %[[FADD:.*]], %for.inc ] @@ -427,8 +427,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-UNORDERED: %[[EXTRACT:.*]] = extractelement <4 x i1> %[[FCMP1]], i32 0 ; CHECK-UNORDERED: br i1 %[[EXTRACT]], label %pred.load.if, label %pred.load.continue ; CHECK-UNORDERED: pred.load.continue6 -; CHECK-UNORDERED: %[[XOR:.*]] = xor <4 x i1> %[[FCMP1]], -; CHECK-UNORDERED: %[[PRED:.*]] = select <4 x i1> %[[XOR]], <4 x float> , <4 x float> %[[PRED_PHI:.*]] +; CHECK-UNORDERED: %[[XOR:.*]] = xor <4 x i1> %[[FCMP1]], splat (i1 true) +; CHECK-UNORDERED: %[[PRED:.*]] = select <4 x i1> %[[XOR]], <4 x float> splat (float 3.000000e+00), <4 x float> %[[PRED_PHI:.*]] ; CHECK-UNORDERED: %[[VEC_FADD]] = fadd <4 x float> %[[PHI]], %[[PRED]] ; CHECK-UNORDERED-NOT: call float @llvm.vector.reduce.fadd ; CHECK-UNORDERED: middle.block @@ -489,7 +489,7 @@ define float @fadd_predicated(ptr noalias nocapture %a, i64 %n) { ; CHECK-ORDERED: %[[RDX_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %pred.load.continue2 ] ; CHECK-ORDERED: pred.load.continue2 ; CHECK-ORDERED: %[[PHI:.*]] = phi <2 x float> [ %[[PHI0:.*]], %pred.load.continue ], [ %[[INS_ELT:.*]], %pred.load.if1 ] -; CHECK-ORDERED: %[[MASK:.*]] = select <2 x i1> %0, <2 x float> %[[PHI]], <2 x float> +; CHECK-ORDERED: %[[MASK:.*]] = select <2 x i1> %0, <2 x float> %[[PHI]], <2 x float> splat (float -0.000000e+00) ; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.v2f32(float %[[RDX_PHI]], <2 x float> %[[MASK]]) ; CHECK-ORDERED: for.end: ; CHECK-ORDERED: %[[RES_PHI:.*]] = phi float [ %[[FADD:.*]], %for.body ], [ %[[RDX]], %middle.block ] @@ -544,7 +544,7 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, ; CHECK-UNORDERED-LABEL: @fadd_multiple ; CHECK-UNORDERED: vector.body -; CHECK-UNORDERED: %[[PHI:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] +; CHECK-UNORDERED: %[[PHI:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] ; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <8 x float>, ptr ; CHECK-UNORDERED: %[[VEC_FADD1:.*]] = fadd <8 x float> %[[PHI]], %[[VEC_LOAD1]] ; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <8 x float>, ptr @@ -593,7 +593,7 @@ define float @fadd_multiple_one_flag(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-UNORDERED-LABEL: @fadd_multiple_one_flag ; CHECK-UNORDERED: vector.body -; CHECK-UNORDERED: %[[PHI:.*]] = phi <8 x float> [ , %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] +; CHECK-UNORDERED: %[[PHI:.*]] = phi <8 x float> [ splat (float -0.000000e+00), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] ; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <8 x float>, ptr ; CHECK-UNORDERED: %[[VEC_FADD1:.*]] = fadd <8 x float> %[[PHI]], %[[VEC_LOAD1]] ; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <8 x float>, ptr @@ -693,7 +693,7 @@ define float @fast_induction_and_reduction(ptr nocapture readonly %values, float ; CHECK-ORDERED: %[[IND_PHI:.*]] = phi <4 x float> [ %[[INDUCTION]], %vector.ph ], [ %[[VEC_IND_NEXT:.*]], %vector.body ] ; CHECK-ORDERED: %[[LOAD1:.*]] = load <4 x float>, ptr ; CHECK-ORDERED: %[[FADD1:.*]] = call float @llvm.vector.reduce.fadd.v4f32(float %[[RDX_PHI]], <4 x float> %[[LOAD1]]) -; CHECK-ORDERED: %[[VEC_IND_NEXT]] = fadd fast <4 x float> %[[IND_PHI]], +; CHECK-ORDERED: %[[VEC_IND_NEXT]] = fadd fast <4 x float> %[[IND_PHI]], splat (float 8.000000e+00) ; CHECK-ORDERED: for.body ; CHECK-ORDERED: %[[RDX_SUM_PHI:.*]] = phi float [ {{.*}}, %scalar.ph ], [ %[[FADD2:.*]], %for.body ] ; CHECK-ORDERED: %[[IND_SUM_PHI:.*]] = phi fast float [ {{.*}}, %scalar.ph ], [ %[[ADD_IND:.*]], %for.body ] @@ -713,7 +713,7 @@ define float @fast_induction_and_reduction(ptr nocapture readonly %values, float ; CHECK-UNORDERED: %[[IND_PHI:.*]] = phi <4 x float> [ %[[INDUCTION]], %vector.ph ], [ %[[VEC_IND_NEXT:.*]], %vector.body ] ; CHECK-UNORDERED: %[[LOAD1:.*]] = load <4 x float>, ptr ; CHECK-UNORDERED: %[[VEC_FADD]] = fadd <4 x float> %[[RDX_PHI]], %[[LOAD1]] -; CHECK-UNORDERED: %[[VEC_IND_NEXT]] = fadd fast <4 x float> %[[IND_PHI]], +; CHECK-UNORDERED: %[[VEC_IND_NEXT]] = fadd fast <4 x float> %[[IND_PHI]], splat (float 8.000000e+00) ; CHECK-UNORDERED: middle.block: ; CHECK-UNORDERED: %[[VEC_RDX:.*]] = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> %[[VEC_FADD]]) ; CHECK-UNORDERED: for.body: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 87326d3a4a7926..c38c4b08cb5b57 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -138,7 +138,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP19]] ; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0 -; CHECK-VF8-NEXT: store <8 x i8> , ptr [[TMP21]], align 1 +; CHECK-VF8-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP21]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 ; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -232,7 +232,7 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0 -; CHECK-NEXT: store <8 x i64> , ptr [[TMP21]], align 1 +; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP21]], align 1 ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 ; CHECK-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -295,7 +295,7 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]] ; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0 -; CHECK-VF8-NEXT: store <8 x i64> , ptr [[TMP21]], align 1 +; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP21]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 ; CHECK-VF8-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll index 375bbb74d3d48c..cf1dd467647fec 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll @@ -83,7 +83,7 @@ for.end: define dso_local void @loop_fixed_width_i128(ptr nocapture %ptr, i64 %N) { ; CHECK-LABEL: @loop_fixed_width_i128 ; CHECK: load <4 x i128>, ptr -; CHECK: add nsw <4 x i128> {{.*}}, +; CHECK: add nsw <4 x i128> {{.*}}, splat (i128 42) ; CHECK: store <4 x i128> {{.*}} ptr ; CHECK-NOT: vscale entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 1d150141e6251e..a33234070867aa 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -95,7 +95,7 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX1]], i64 [[TMP2]]) -; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll index 994f2f5e377632..af1c63bc405a38 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -338,7 +338,7 @@ define void @test_v2_v4m(ptr noalias %a, ptr readonly %b) #3 { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_mask(<4 x i64> [[WIDE_LOAD]], <4 x i1> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @foo_vector_fixed4_mask(<4 x i64> [[WIDE_LOAD]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP5]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll index 187f50f2e76a48..ed5467258c71fc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll @@ -27,7 +27,7 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP5]], ptr [[TMP7]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 4 ; CHECK-NEXT: [[NEXT_ACTIVE_LANE_MASK]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[IDX]], i64 [[N2]]) -; CHECK-NEXT: [[NOT_ACTIVE_LANE_MASK:%.*]] = xor <4 x i1> [[NEXT_ACTIVE_LANE_MASK]], +; CHECK-NEXT: [[NOT_ACTIVE_LANE_MASK:%.*]] = xor <4 x i1> [[NEXT_ACTIVE_LANE_MASK]], splat (i1 true) ; CHECK-NEXT: [[FIRST_LANE_SET:%.*]] = extractelement <4 x i1> [[NOT_ACTIVE_LANE_MASK]], i32 0 ; CHECK-NEXT: br i1 [[FIRST_LANE_SET]], label %middle.block, label %vector.body @@ -63,7 +63,7 @@ define void @cond_uniform_load(ptr nocapture %dst, ptr nocapture readonly %src, ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IDX]], 0 ; CHECK: [[COND_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{%.*}}, i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[COND_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[MASK:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer ; CHECK-NEXT: call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[SRC_SPLAT]], i32 4, <4 x i1> [[MASK]], <4 x i32> poison) entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll index f53fd5bc8d15a0..c017c1f3b36ee8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll @@ -20,7 +20,7 @@ define void @test0(ptr noalias %M3, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[WIDE_LOAD]], splat (i16 10) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll index 57807604b37a87..1dd49ecf85b81b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -49,8 +49,8 @@ define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N) ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison) ; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison) -; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], -; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], splat (double 1.000000e+00) +; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], splat (double 1.000000e+00) ; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP10]], ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]]) ; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP11]], ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE5]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index c0933a9445f94d..fda9ef2cf6c2f3 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -20,7 +20,7 @@ define void @test_stride1_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> , [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) @@ -84,7 +84,7 @@ define void @test_stride-1_4i32(ptr readonly %data, ptr noalias nocapture %dst, ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> , [[REVERSE]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> splat (i32 5), [[REVERSE]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4 @@ -150,7 +150,7 @@ define void @test_stride2_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> , [[STRIDED_VEC]] +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> splat (i32 5), [[STRIDED_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4 @@ -209,16 +209,16 @@ define void @test_stride3_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], splat (i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP2]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> , [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -273,16 +273,16 @@ define void @test_stride4_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP2]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> , [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -343,7 +343,7 @@ define void @test_stride_loopinvar_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> , [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) @@ -405,16 +405,16 @@ define void @test_stride_noninvar_4i32(ptr readonly %data, ptr noalias nocapture ; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], [[VEC_IND2]] -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw <4 x i32> [[TMP2]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP3]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> , [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], splat (i32 32) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -515,7 +515,7 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 3), [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[X]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -526,15 +526,15 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], [[VEC_IND4]] -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP4]], splat (i32 2) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP5]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP6]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> , [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP6]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[VEC_IND4]], [[DOTSPLAT3]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll index fec5921720fed9..fde594d0483613 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll @@ -29,9 +29,9 @@ define void @arm_abs_q7(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 % ; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 -128) ; CHECK-NEXT: [[TMP3:%.*]] = sub <16 x i8> zeroinitializer, [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> , <16 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> splat (i8 127), <16 x i8> [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[WIDE_LOAD]], <16 x i8> [[TMP4]] ; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[NEXT_GEP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 @@ -124,9 +124,9 @@ define void @arm_abs_q15(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX7]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <8 x i16> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], splat (i16 -32768) ; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i16> zeroinitializer, [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> , <8 x i16> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> splat (i16 32767), <8 x i16> [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[WIDE_LOAD]], <8 x i16> [[TMP6]] ; CHECK-NEXT: store <8 x i16> [[TMP7]], ptr [[NEXT_GEP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -219,9 +219,9 @@ define void @arm_abs_q31(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX7]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 -2147483648) ; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <4 x i32> zeroinitializer, [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> , <4 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> splat (i32 2147483647), <4 x i32> [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP6]] ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[NEXT_GEP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll index 1371d80b00cadd..6011ccfd156148 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll @@ -72,7 +72,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD1]]) ; CHECK-NEXT: [[TMP8]] = add i32 [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -115,7 +115,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257) ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) @@ -167,14 +167,14 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257) ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP4]] = and <4 x i32> [[VEC_PHI]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 @@ -427,7 +427,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) @@ -480,7 +480,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll index b5ae9700217eae..832d4db53036fb 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -251,7 +251,7 @@ define i32 @mul_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] @@ -318,7 +318,7 @@ define i32 @and_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] @@ -586,7 +586,7 @@ define float @fmul_f32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float 1.000000e+00), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i32 [[TMP0]] @@ -653,7 +653,7 @@ define i32 @smin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 2147483647), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -722,7 +722,7 @@ define i32 @smax_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -791,7 +791,7 @@ define i32 @umin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll index b0cccf4d0a7bfa..60c3b52f2003a6 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll @@ -45,7 +45,7 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 % ; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD7]] ; CHECK-NEXT: [[TMP8:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = fdiv fast <4 x float> [[TMP8]], [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[DOTNOT9]], <4 x float> , <4 x float> [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[DOTNOT9]], <4 x float> splat (float -0.000000e+00), <4 x float> [[TMP9]] ; CHECK-NEXT: [[PREDPHI]] = fadd reassoc arcp contract afn <4 x float> [[VEC_PHI]], [[TMP10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll index 3432773b4e1b3d..d49639db3bbfcd 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -111,7 +111,7 @@ define hidden void @pointer_phi_v4i32_add3(ptr noalias nocapture readonly %A, pt ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -544,7 +544,7 @@ define hidden void @pointer_phi_v4f32_add3(ptr noalias nocapture readonly %A, pt ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x float> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x float> poison) ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -749,8 +749,8 @@ define hidden void @pointer_phi_v4i32_uf2(ptr noalias nocapture readonly %A, ptr ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER5]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 @@ -816,10 +816,10 @@ define hidden void @pointer_phi_v4i32_uf4(ptr noalias nocapture readonly %A, ptr ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER7]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER8]], [[BROADCAST_SPLAT]] @@ -886,18 +886,18 @@ define hidden void @mult_ptr_iv(ptr noalias nocapture readonly %x, ptr noalias n ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI5]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP0]], i32 1, <4 x i1> , <4 x i8> poison), !alias.scope [[META28:![0-9]+]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP0]], i32 1, <4 x i1> splat (i1 true), <4 x i8> poison), !alias.scope [[META28:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 2 -; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP2]], i32 1, <4 x i1> , <4 x i8> poison), !alias.scope [[META28]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP3]], i32 1, <4 x i1> , <4 x i8> poison), !alias.scope [[META28]] -; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP2]], i32 1, <4 x i1> splat (i1 true), <4 x i8> poison), !alias.scope [[META28]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP3]], i32 1, <4 x i1> splat (i1 true), <4 x i8> poison), !alias.scope [[META28]] +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], splat (i8 10) ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]] ; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER8]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP4]], <4 x ptr> [[TMP1]], i32 1, <4 x i1> ), !alias.scope [[META31:![0-9]+]], !noalias [[META28]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP4]], <4 x ptr> [[TMP1]], i32 1, <4 x i1> splat (i1 true)), !alias.scope [[META31:![0-9]+]], !noalias [[META28]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i32 2 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP5]], <4 x ptr> [[TMP7]], i32 1, <4 x i1> ), !alias.scope [[META31]], !noalias [[META28]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP6]], <4 x ptr> [[TMP8]], i32 1, <4 x i1> ), !alias.scope [[META31]], !noalias [[META28]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP5]], <4 x ptr> [[TMP7]], i32 1, <4 x i1> splat (i1 true)), !alias.scope [[META31]], !noalias [[META28]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP6]], <4 x ptr> [[TMP8]], i32 1, <4 x i1> splat (i1 true)), !alias.scope [[META31]], !noalias [[META28]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12 ; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI5]], i32 12 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll index 7db5bccd896b27..6f7b45beeb6e7a 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll @@ -16,8 +16,8 @@ define arm_aapcs_vfpcc i32 @minmaxval4(ptr nocapture readonly %x, ptr nocapture ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 2147483647), [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI1]]) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index ffeca069dc0ea4..0bac1630673067 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -25,7 +25,7 @@ define void @trunc_not_allowed_different_vec_elemns(ptr noalias nocapture %A, pt ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = shl <4 x i16> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = shl <4 x i16> [[TMP8]], splat (i16 1) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[D:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store <4 x i16> [[TMP9]], ptr [[TMP11]], align 2 @@ -54,7 +54,7 @@ define void @trunc_not_allowed_different_vec_elemns(ptr noalias nocapture %A, pt ; CHECK-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX8]], align 2 ; CHECK-NEXT: [[ADD9]] = add nuw nsw i32 [[I_021]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ADD9]], 431 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: br label %for.body @@ -451,7 +451,7 @@ define void @fptrunc_not_allowed(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = fptrunc <4 x float> [[TMP5]] to <4 x half> -; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x half> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x half> [[TMP8]], splat (half 0xH4000) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds half, ptr [[D:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store <4 x half> [[TMP9]], ptr [[TMP11]], align 2 @@ -537,12 +537,12 @@ define dso_local void @select_not_allowed(ptr noalias nocapture %A, ptr noalias ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x ptr> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, <4 x ptr> [[TMP4]], <4 x i32> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP5]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP5]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <4 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -611,7 +611,7 @@ define i32 @i32_smin_reduction(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 2147483647), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -680,7 +680,7 @@ define i32 @i32_smax_reduction(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -749,7 +749,7 @@ define i32 @i32_umin_reduction(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index 4888f3d6383c85..55fd3530806d25 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -33,13 +33,13 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[VEC_PHI15:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP48:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI16:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP49:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP50:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <2 x i64> [[STEP_ADD4]], -; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <2 x i64> [[STEP_ADD5]], -; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <2 x i64> [[STEP_ADD6]], -; CHECK-NEXT: [[STEP_ADD8:%.*]] = add <2 x i64> [[STEP_ADD7]], -; CHECK-NEXT: [[STEP_ADD9:%.*]] = add <2 x i64> [[STEP_ADD8]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_4:%.*]] = add <2 x i64> [[STEP_ADD_3]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_5:%.*]] = add <2 x i64> [[STEP_ADD_4]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_6:%.*]] = add <2 x i64> [[STEP_ADD_5]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_7:%.*]] = add <2 x i64> [[STEP_ADD_6]], splat (i64 2) ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 @@ -66,14 +66,14 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[TMP24:%.*]] = zext <2 x i8> [[WIDE_LOAD29]] to <2 x i64> ; CHECK-NEXT: [[TMP25:%.*]] = zext <2 x i8> [[WIDE_LOAD30]] to <2 x i64> ; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[WIDE_LOAD31]] to <2 x i64> -; CHECK-NEXT: [[TMP27:%.*]] = shl <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP28:%.*]] = shl <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP29:%.*]] = shl <2 x i64> [[STEP_ADD4]], -; CHECK-NEXT: [[TMP30:%.*]] = shl <2 x i64> [[STEP_ADD5]], -; CHECK-NEXT: [[TMP31:%.*]] = shl <2 x i64> [[STEP_ADD6]], -; CHECK-NEXT: [[TMP32:%.*]] = shl <2 x i64> [[STEP_ADD7]], -; CHECK-NEXT: [[TMP33:%.*]] = shl <2 x i64> [[STEP_ADD8]], -; CHECK-NEXT: [[TMP34:%.*]] = shl <2 x i64> [[STEP_ADD9]], +; CHECK-NEXT: [[TMP27:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP28:%.*]] = shl <2 x i64> [[STEP_ADD]], splat (i64 1) +; CHECK-NEXT: [[TMP29:%.*]] = shl <2 x i64> [[STEP_ADD_2]], splat (i64 1) +; CHECK-NEXT: [[TMP30:%.*]] = shl <2 x i64> [[STEP_ADD_3]], splat (i64 1) +; CHECK-NEXT: [[TMP31:%.*]] = shl <2 x i64> [[STEP_ADD_4]], splat (i64 1) +; CHECK-NEXT: [[TMP32:%.*]] = shl <2 x i64> [[STEP_ADD_5]], splat (i64 1) +; CHECK-NEXT: [[TMP33:%.*]] = shl <2 x i64> [[STEP_ADD_6]], splat (i64 1) +; CHECK-NEXT: [[TMP34:%.*]] = shl <2 x i64> [[STEP_ADD_7]], splat (i64 1) ; CHECK-NEXT: [[TMP35:%.*]] = shl <2 x i64> [[TMP19]], [[TMP27]] ; CHECK-NEXT: [[TMP36:%.*]] = shl <2 x i64> [[TMP20]], [[TMP28]] ; CHECK-NEXT: [[TMP37:%.*]] = shl <2 x i64> [[TMP21]], [[TMP29]] @@ -91,7 +91,7 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[TMP49]] = or <2 x i64> [[TMP41]], [[VEC_PHI16]] ; CHECK-NEXT: [[TMP50]] = or <2 x i64> [[TMP42]], [[VEC_PHI17]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD9]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD_7]], splat (i64 2) ; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index a6797dea0836c2..ba9d49fc682c4c 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -372,14 +372,14 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; VF-TWO-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, ptr [[TMP71]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> -; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], -; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], -; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], -; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], -; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], -; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], -; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], -; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], +; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00) +; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00) ; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] ; VF-TWO-CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 0 ; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4 @@ -427,7 +427,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -1 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <2 x float>, ptr [[TMP104]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <2 x float> [[WIDE_LOAD23]], <2 x float> poison, <2 x i32> -; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <2 x float> [[REVERSE24]], +; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <2 x float> [[REVERSE24]], splat (float 1.000000e+00) ; VF-TWO-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP97]] ; VF-TWO-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, ptr [[TMP106]], i32 0 ; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP105]], ptr [[TMP107]], align 4 @@ -531,14 +531,14 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; VF-FOUR-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, ptr [[TMP71]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], -; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], -; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], -; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], -; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], -; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], -; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], -; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], +; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00) +; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00) ; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] ; VF-FOUR-CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 0 ; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4 @@ -586,7 +586,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -3 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, ptr [[TMP104]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x float> [[WIDE_LOAD23]], <4 x float> poison, <4 x i32> -; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <4 x float> [[REVERSE24]], +; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <4 x float> [[REVERSE24]], splat (float 1.000000e+00) ; VF-FOUR-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP97]] ; VF-FOUR-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, ptr [[TMP106]], i32 0 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP105]], ptr [[TMP107]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index 6c8e3606c53f2f..ab62addb94b9f5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -486,8 +486,8 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> -; FIXED-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> +; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) +; FIXED-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], [[TMP5]] ; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP6]] ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD]] @@ -614,8 +614,8 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> -; FIXED-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> +; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) +; FIXED-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], [[TMP5]] ; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP6]] ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD]] @@ -736,10 +736,10 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 -; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], splat (i64 42) +; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], splat (i64 42) +; FIXED-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], splat (i64 27) +; FIXED-NEXT: [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], splat (i64 27) ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[TMP6]], <4 x i64> [[WIDE_LOAD]] ; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP2]], align 8 @@ -858,10 +858,10 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 -; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], splat (i64 42) +; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], splat (i64 42) +; FIXED-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], splat (i64 27) +; FIXED-NEXT: [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], splat (i64 27) ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[TMP6]], <4 x i64> [[WIDE_LOAD]] ; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP2]], align 8 @@ -981,10 +981,10 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 32 ; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1 ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP3]], align 1 -; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP6:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> , <32 x i8> -; FIXED-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> , <32 x i8> +; FIXED-NEXT: [[TMP4:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD]], splat (i8 -128) +; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], splat (i8 -128) +; FIXED-NEXT: [[TMP6:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1) +; FIXED-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1) ; FIXED-NEXT: [[TMP8:%.*]] = sdiv <32 x i8> [[WIDE_LOAD]], [[TMP6]] ; FIXED-NEXT: [[TMP9:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP7]] ; FIXED-NEXT: [[PREDPHI:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> [[TMP8]], <32 x i8> [[WIDE_LOAD]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index 2789ab484e1b66..ad10ddc26fc7a5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -71,8 +71,8 @@ define void @load_store_factor2_i32(ptr %p) { ; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP2]], align 4 ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; FIXED-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], +; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1) +; FIXED-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2) ; FIXED-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP7]], <16 x i32> ; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> ; FIXED-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4 @@ -248,8 +248,8 @@ define void @load_store_factor2_i64(ptr %p) { ; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> -; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], -; FIXED-NEXT: [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], +; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1) +; FIXED-NEXT: [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2) ; FIXED-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP7]], <8 x i32> ; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> ; FIXED-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 @@ -372,9 +372,9 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], -; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1) +; CHECK-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2) +; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3) ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> @@ -425,9 +425,9 @@ define void @load_store_factor3_i32(ptr %p) { ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> -; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; FIXED-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], -; FIXED-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], +; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1) +; FIXED-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2) +; FIXED-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3) ; FIXED-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> ; FIXED-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> ; FIXED-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> @@ -478,9 +478,9 @@ define void @load_store_factor3_i32(ptr %p) { ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> -; SCALABLE-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], -; SCALABLE-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], -; SCALABLE-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], +; SCALABLE-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1) +; SCALABLE-NEXT: [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2) +; SCALABLE-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3) ; SCALABLE-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> ; SCALABLE-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> ; SCALABLE-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> @@ -562,9 +562,9 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1) +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2) +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3) ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> @@ -615,9 +615,9 @@ define void @load_store_factor3_i64(ptr %p) { ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> -; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], -; FIXED-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], -; FIXED-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], +; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1) +; FIXED-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2) +; FIXED-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3) ; FIXED-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> ; FIXED-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> ; FIXED-NEXT: [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> @@ -668,9 +668,9 @@ define void @load_store_factor3_i64(ptr %p) { ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> -; SCALABLE-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], -; SCALABLE-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], -; SCALABLE-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], +; SCALABLE-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1) +; SCALABLE-NEXT: [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2) +; SCALABLE-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3) ; SCALABLE-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> ; SCALABLE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> ; SCALABLE-NEXT: [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> @@ -757,14 +757,14 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], -; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], -; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], -; CHECK-NEXT: [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], -; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], -; CHECK-NEXT: [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], -; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], +; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1) +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2) +; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3) +; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4) +; CHECK-NEXT: [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5) +; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6) +; CHECK-NEXT: [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7) +; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8) ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> @@ -849,14 +849,14 @@ define void @load_store_factor8(ptr %p) { ; FIXED-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; FIXED-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; FIXED-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> -; FIXED-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], -; FIXED-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], -; FIXED-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], -; FIXED-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], -; FIXED-NEXT: [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], -; FIXED-NEXT: [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], -; FIXED-NEXT: [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], -; FIXED-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], +; FIXED-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1) +; FIXED-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2) +; FIXED-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3) +; FIXED-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4) +; FIXED-NEXT: [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5) +; FIXED-NEXT: [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6) +; FIXED-NEXT: [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7) +; FIXED-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8) ; FIXED-NEXT: [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> ; FIXED-NEXT: [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> ; FIXED-NEXT: [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> @@ -941,14 +941,14 @@ define void @load_store_factor8(ptr %p) { ; SCALABLE-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> -; SCALABLE-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], -; SCALABLE-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], -; SCALABLE-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], -; SCALABLE-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], -; SCALABLE-NEXT: [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], -; SCALABLE-NEXT: [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], -; SCALABLE-NEXT: [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], -; SCALABLE-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], +; SCALABLE-NEXT: [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1) +; SCALABLE-NEXT: [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2) +; SCALABLE-NEXT: [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3) +; SCALABLE-NEXT: [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4) +; SCALABLE-NEXT: [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5) +; SCALABLE-NEXT: [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6) +; SCALABLE-NEXT: [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7) +; SCALABLE-NEXT: [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8) ; SCALABLE-NEXT: [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> ; SCALABLE-NEXT: [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> ; SCALABLE-NEXT: [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index ec50b0cac03829..07a5c53894cdec 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -261,7 +261,7 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 @@ -322,7 +322,7 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = shl <32 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <32 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1 @@ -382,7 +382,7 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index f63b1b73dc522b..02b7ce3795ad52 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -9,8 +9,8 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], splat (i64 48) +; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], splat (i64 52) ; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer @@ -21,10 +21,10 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 3) -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]] @@ -54,7 +54,7 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll index 7f4eb387a1ece6..95eb67b3823ee6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: foo ; CHECK: LV: IC is 2 -; CHECK: %{{.*}} = add <8 x i32> %{{.*}}, +; CHECK: %{{.*}} = add <8 x i32> %{{.*}}, splat (i32 8) ; CHECK: %{{.*}} = add {{.*}}, 16 ; Function Attrs: nofree norecurse nosync nounwind writeonly diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index 8388d018b89e82..d51426316b1e3c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -21,7 +21,7 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -96,7 +96,7 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -168,8 +168,8 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -239,8 +239,8 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -310,8 +310,8 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -406,11 +406,11 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 35) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[TMP4]], <4 x i32> poison) -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_MASKED_LOAD]], +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 2) ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i1> [[VEC_PHI]], [[TMP8]] ; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP9]], <4 x i1> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index fa13cd8f19ae56..d0e14e5ba44c7b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -162,7 +162,7 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = trunc <2 x i8> [[BROADCAST_SPLAT]] to <2 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i1> , [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i1> splat (i1 true), [[TMP0]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x ptr> poison, ptr [[DST]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT3]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 06a4f98d3dc726..fadaf54255a77d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -379,10 +379,10 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], -; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) +; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], splat (i64 10) ; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP1]], <4 x i64> poison) ; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison) ; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer @@ -393,7 +393,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXEDLEN: middle.block: @@ -490,7 +490,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) -; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison) ; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer @@ -498,7 +498,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP4]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -1184,10 +1184,10 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], -; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) +; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], splat (i64 10) ; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP1]]) ; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP2]]) ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] @@ -1196,7 +1196,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; FIXEDLEN: middle.block: @@ -1294,14 +1294,14 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) -; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP2]]) ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-FIXEDLEN: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll index e594fce5a7a799..06d0eb3fc203d7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll @@ -154,12 +154,12 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; NO-VP: vector.ph: ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> , i32 [[START:%.*]], i32 0 +; NO-VP-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> splat (i32 1), i32 [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] ; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 @@ -1223,12 +1223,12 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; NO-VP: vector.ph: ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-VP-NEXT: [[TMP0:%.*]] = insertelement <8 x float> , float [[START:%.*]], i32 0 +; NO-VP-NEXT: [[TMP0:%.*]] = insertelement <8 x float> splat (float 1.000000e+00), float [[START:%.*]], i32 0 ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ splat (float 1.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP1]] ; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll index 99a7d1d34f26d8..e7fdfbcf76caa4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll @@ -25,8 +25,8 @@ define void @vf_will_not_generate_any_vector_insts(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[BROADCAST_SPLAT5]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 4, <2 x i1> ), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] -; CHECK-NEXT: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[BROADCAST_SPLAT5]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 4, <2 x i1> ), !alias.scope [[META3]], !noalias [[META0]] +; CHECK-NEXT: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[BROADCAST_SPLAT5]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 4, <2 x i1> splat (i1 true)), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[BROADCAST_SPLAT5]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 4, <2 x i1> splat (i1 true)), !alias.scope [[META3]], !noalias [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll index 3477c8d879106b..107ae504612968 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll @@ -16,7 +16,7 @@ define void @test_scalar_steps_target_instruction_cost(ptr %dst) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IV]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IV]], splat (i64 8) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll index 5e8f287217478b..f828b74bc8bf69 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll @@ -9,13 +9,13 @@ define void @test(ptr %p, i40 %a) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], splat (i40 24) +; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], splat (i40 28) ; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1> ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <16 x i1> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i1> zeroinitializer, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <16 x i1> [[TMP7]], zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -23,7 +23,7 @@ define void @test(ptr %p, i40 %a) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT2]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT3]], -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], splat (i32 9) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll index 9a1e613a736bf3..699b8487a065a5 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -22,7 +22,7 @@ define void @func_21() { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -59,7 +59,7 @@ define void @func_21() { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 6 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -78,7 +78,7 @@ define void @func_21() { ; CHECK-NEXT: store i32 [[SCALAR_RECUR]], ptr [[B_PTR]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 5 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index bb35806cea20a1..64e57747bf2ae1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -36,9 +36,9 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <80 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <80 x float> [[WIDE_VEC1]], <80 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fadd <16 x float> [[STRIDED_VEC2]], [[TMP2]] -; CHECK-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP5]], <16 x ptr> [[TMP3]], i32 4, <16 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP5]], <16 x ptr> [[TMP3]], i32 4, <16 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 80) ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body %data = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float] } @@ -87,7 +87,7 @@ attributes #0 = { "target-cpu"="knl" } ; FORCE: vector.body: ; FORCE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] ; FORCE-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ] -; FORCE-NEXT: [[TMP2:%.*]] = icmp ule <2 x i32> [[VEC_IND]], +; FORCE-NEXT: [[TMP2:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 2) ; FORCE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; FORCE-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FORCE: pred.store.if: @@ -103,7 +103,7 @@ attributes #0 = { "target-cpu"="knl" } ; FORCE-NEXT: br label [[PRED_STORE_CONTINUE4]] ; FORCE: pred.store.continue2: ; FORCE-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 -; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; FORCE-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4 ; FORCE-NEXT: br i1 [[TMP15]], label {{%.*}}, label [[VECTOR_BODY]] ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 0418f93498bc20..3363aea39e73f6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -30,7 +30,7 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <32 x i8> [[VEC_IND]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i8> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i8> [[VEC_IND]], splat (i8 32) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -61,7 +61,7 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store <16 x i8> [[VEC_IND9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX8]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT10]] = add <16 x i8> [[VEC_IND9]], +; CHECK-NEXT: [[VEC_IND_NEXT10]] = add <16 x i8> [[VEC_IND9]], splat (i8 16) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -118,15 +118,15 @@ define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <2 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 9, [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[STEP_ADD1]], -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 3) +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[STEP_ADD]], splat (i64 3) +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[STEP_ADD_2]], splat (i64 3) +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[STEP_ADD_3]], splat (i64 3) ; CHECK-NEXT: [[TMP12:%.*]] = sitofp <2 x i64> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP13:%.*]] = sitofp <2 x i64> [[TMP9]] to <2 x float> ; CHECK-NEXT: [[TMP14:%.*]] = sitofp <2 x i64> [[TMP10]] to <2 x float> @@ -141,7 +141,7 @@ define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: store <2 x float> [[TMP14]], ptr [[TMP22]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP15]], ptr [[TMP23]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD_3]], splat (i64 2) ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll index 6060a7318deb64..8b47aee6bf389b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll @@ -15,10 +15,10 @@ define i64 @test_foldable_live_in_via_scev() { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0]] = mul <2 x i64> [[VEC_PHI]], -; CHECK-NEXT: [[TMP1]] = mul <2 x i64> [[VEC_PHI1]], +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0]] = mul <2 x i64> [[VEC_PHI]], splat (i64 2) +; CHECK-NEXT: [[TMP1]] = mul <2 x i64> [[VEC_PHI1]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -73,8 +73,8 @@ define i64 @second_lshr_operand_zero_via_scev() { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND2]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> [[STEP_ADD]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], zeroinitializer @@ -88,8 +88,8 @@ define i64 @second_lshr_operand_zero_via_scev() { ; CHECK-NEXT: [[TMP10]] = or <2 x i64> [[TMP8]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP11]] = or <2 x i64> [[TMP9]], [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[STEP_ADD4]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[STEP_ADD4]], splat (i32 2) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 29e54fabad0c1b..6f67d1e283e395 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -426,8 +426,8 @@ define i1 @any_of_cost(ptr %start, ptr %end) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x ptr> [[TMP20]], ptr [[TMP19]], i32 1 ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq <2 x ptr> [[TMP17]], zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq <2 x ptr> [[TMP21]], zeroinitializer -; CHECK-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[TMP22]], -; CHECK-NEXT: [[TMP25:%.*]] = xor <2 x i1> [[TMP23]], +; CHECK-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[TMP22]], splat (i1 true) +; CHECK-NEXT: [[TMP25:%.*]] = xor <2 x i1> [[TMP23]], splat (i1 true) ; CHECK-NEXT: [[TMP26]] = or <2 x i1> [[VEC_PHI]], [[TMP24]] ; CHECK-NEXT: [[TMP27]] = or <2 x i1> [[VEC_PHI3]], [[TMP25]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -782,10 +782,10 @@ define i64 @cost_assume(ptr %end, i64 %N) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7]] = add <2 x i64> [[VEC_PHI]], -; CHECK-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI2]], -; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI3]], -; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[VEC_PHI4]], +; CHECK-NEXT: [[TMP7]] = add <2 x i64> [[VEC_PHI]], splat (i64 1) +; CHECK-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI2]], splat (i64 1) +; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI3]], splat (i64 1) +; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[VEC_PHI4]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) @@ -851,13 +851,13 @@ define void @reduction_store(ptr noalias %src, ptr %dst, i1 %x) #2 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = zext <4 x i1> [[BROADCAST_SPLAT]] to <4 x i64> -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[TMP0]], splat (i64 12) ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11]] = and <4 x i32> [[VEC_PHI]], [[TMP2]] ; CHECK-NEXT: [[TMP12]] = and <4 x i32> [[VEC_PHI1]], [[TMP2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -932,7 +932,7 @@ define i64 @live_in_known_1_via_scev() { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 5) ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 @@ -986,7 +986,7 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[TMP0]] to <2 x i64> ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer @@ -994,7 +994,7 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3]] = mul <2 x i64> [[TMP2]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] @@ -1051,8 +1051,8 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[VEC_PHI1]], +; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], splat (i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[VEC_PHI1]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP0]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP2]] to <16 x i1> diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll index 4c3c2e89cd204b..c861aa8172b9b8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll @@ -156,7 +156,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP29]], i32 8, <4 x i1> [[TMP8]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index 7b7b36e15cd5b2..f6f77e274273e3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -26,7 +26,7 @@ define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , {{.*}} ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 @@ -69,7 +69,7 @@ define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ; CHECK: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0 entry: @@ -107,9 +107,9 @@ define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , {{.*}} ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw <4 x i64> [[TMP5]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw <4 x i64> [[TMP5]], splat (i64 2) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], <4 x i64> [[TMP6]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP7]], i32 4, <4 x i1> [[TMP8]], <4 x float> poison), !invariant.load !0 entry: @@ -149,9 +149,9 @@ define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[INPUT:%.*]], <4 x i64> [[TMP6]] -; CHECK: [[TMP10:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK: [[TMP10:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x ptr> [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP12]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0 @@ -192,8 +192,8 @@ define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , {{.*}} ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 @@ -231,10 +231,10 @@ define void @drop_scalar_exact(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , {{.*}} ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = sdiv i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP9]], i32 0 @@ -286,12 +286,12 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[P]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP4]], i32 8, <4 x i1> [[TMP0]], <4 x double> poison) -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x double> zeroinitializer, <4 x double> [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3 ; CHECK-NEXT: store double [[TMP6]], ptr [[P1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: @@ -355,11 +355,11 @@ define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , {{.*}} ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], -; CHECK-NEXT: [[TMP8:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], <4 x i64> [[TMP8]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP9]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0 ; @@ -401,8 +401,8 @@ define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , {{.*}} ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 @@ -495,7 +495,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_SREM_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]] ; CHECK: pred.srem.if: @@ -530,7 +530,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr %dst, i64 [[TMP0]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i32 0 ; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP20]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: br i1 true, label %middle.block, label %vector.body ; CHECK: middle.block: @@ -574,12 +574,12 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr %C, i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr %B, i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD2]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> , <4 x float> [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD2]], splat (float 2.000000e+00) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> splat (float 3.300000e+01), <4 x float> [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index f3875ccb41668c..9092da6092595a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -40,15 +40,15 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[OFFSET_IDX]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -1 -; CHECK-NEXT: [[TMP13:%.*]] = mul <16 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP14:%.*]] = lshr exact <16 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP13:%.*]] = mul <16 x i32> [[VEC_IND]], splat (i32 196608) +; CHECK-NEXT: [[TMP14:%.*]] = lshr exact <16 x i32> [[TMP13]], splat (i32 16) ; CHECK-NEXT: [[TMP15:%.*]] = trunc <16 x i32> [[TMP14]] to <16 x i16> ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[ARR:%.*]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[TMP17]], i32 0 ; CHECK-NEXT: store <16 x i16> [[TMP15]], ptr [[TMP18]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -77,15 +77,15 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[OFFSET_IDX15]] to i32 ; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP21]], 0 ; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], -1 -; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i32> [[VEC_IND13]], -; CHECK-NEXT: [[TMP25:%.*]] = lshr exact <8 x i32> [[TMP24]], +; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i32> [[VEC_IND13]], splat (i32 196608) +; CHECK-NEXT: [[TMP25:%.*]] = lshr exact <8 x i32> [[TMP24]], splat (i32 16) ; CHECK-NEXT: [[TMP26:%.*]] = trunc <8 x i32> [[TMP25]] to <8 x i16> ; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP23]] to i64 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[TMP28]], i32 0 ; CHECK-NEXT: store <8 x i16> [[TMP26]], ptr [[TMP29]], align 2 ; CHECK-NEXT: [[INDEX_NEXT16]] = add nuw i64 [[INDEX9]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT14]] = add <8 x i32> [[VEC_IND13]], +; CHECK-NEXT: [[VEC_IND_NEXT14]] = add <8 x i32> [[VEC_IND13]], splat (i32 8) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -145,92 +145,90 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[L]], [[N_MOD_VF]] ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLAT3:%.*]] = mul <16 x i16> , [[TMP2]] - +; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[TMP2]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <16 x i16> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i16> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer - +; CHECK-NEXT: [[TMP21:%.*]] = mul <16 x i16> , [[DOTSPLAT]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i16> zeroinitializer, [[TMP21]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16 ; CHECK-NEXT: [[IND_END:%.*]] = mul i16 [[DOTCAST]], [[TMP0]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i16> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i16> [[VEC_IND]], [[DOTSPLAT3]] -; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <16 x i16> [[STEP_ADD]], [[DOTSPLAT3]] -; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <16 x i16> [[STEP_ADD4]], [[DOTSPLAT3]] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i16> [[VEC_IND]], [[TMP1]] +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <16 x i16> [[STEP_ADD]], [[TMP1]] +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <16 x i16> [[STEP_ADD_2]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = sub <16 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP9:%.*]] = sub <16 x i16> [[STEP_ADD4]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP10:%.*]] = sub <16 x i16> [[STEP_ADD5]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 16 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 32 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 48 -; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP15]], align 2 -; CHECK-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP16]], align 2 -; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr [[TMP17]], align 2 -; CHECK-NEXT: store <16 x i16> [[TMP10]], ptr [[TMP18]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i16> [[VEC_IND]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP5:%.*]] = sub <16 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[STEP_ADD_2]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[STEP_ADD_3]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 16 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 32 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 48 +; CHECK-NEXT: store <16 x i16> [[TMP4]], ptr [[TMP9]], align 2 +; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP10]], align 2 +; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP11]], align 2 +; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i16> [[STEP_ADD5]], [[DOTSPLAT3]] -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i16> [[STEP_ADD_3]], [[TMP1]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[L]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[DOTCAST12:%.*]] = trunc i64 [[N_VEC]] to i16 -; CHECK-NEXT: [[IND_END13:%.*]] = mul i16 [[DOTCAST12]], [[TMP0]] +; CHECK-NEXT: [[DOTCAST9:%.*]] = trunc i64 [[N_VEC]] to i16 +; CHECK-NEXT: [[IND_END10:%.*]] = mul i16 [[DOTCAST9]], [[TMP0]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[L]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF7:%.*]] = urem i64 [[L]], 8 -; CHECK-NEXT: [[N_VEC8:%.*]] = sub i64 [[L]], [[N_MOD_VF7]] -; CHECK-NEXT: [[DOTCAST10:%.*]] = trunc i64 [[N_VEC8]] to i16 -; CHECK-NEXT: [[IND_END11:%.*]] = mul i16 [[DOTCAST10]], [[TMP0]] -; CHECK-NEXT: [[DOTSPLATINSERT17:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0 -; CHECK-NEXT: [[DOTSPLAT18:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT17]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT19:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT20:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT19]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = mul <8 x i16> , [[DOTSPLAT20]] -; CHECK-NEXT: [[INDUCTION21:%.*]] = add <8 x i16> [[DOTSPLAT18]], [[TMP20]] -; CHECK-NEXT: [[TMP21:%.*]] = mul i16 [[TMP0]], 8 -; CHECK-NEXT: [[DOTSPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[TMP21]], i64 0 -; CHECK-NEXT: [[DOTSPLAT23:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT28:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT27]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[L]], 8 +; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[L]], [[N_MOD_VF4]] +; CHECK-NEXT: [[DOTCAST7:%.*]] = trunc i64 [[N_VEC5]] to i16 +; CHECK-NEXT: [[IND_END8:%.*]] = mul i16 [[DOTCAST7]], [[TMP0]] +; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT13]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT15:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0 +; CHECK-NEXT: [[DOTSPLAT16:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT15]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i16> , [[DOTSPLAT16]] +; CHECK-NEXT: [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT14]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = mul i16 [[TMP0]], 8 +; CHECK-NEXT: [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0 +; CHECK-NEXT: [[DOTSPLAT19:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT18]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX16:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT29:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND24:%.*]] = phi <8 x i16> [ [[INDUCTION21]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT26:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX16]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = sub <8 x i16> [[VEC_IND24]], [[BROADCAST_SPLAT28]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[TMP24]], i32 0 -; CHECK-NEXT: store <8 x i16> [[TMP23]], ptr [[TMP25]], align 2 -; CHECK-NEXT: [[INDEX_NEXT29]] = add nuw i64 [[INDEX16]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT26]] = add <8 x i16> [[VEC_IND24]], [[DOTSPLAT23]] -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC8]] -; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT24:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i16> [ [[INDUCTION17]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX12]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0 +; CHECK-NEXT: store <8 x i16> [[TMP17]], ptr [[TMP19]], align 2 +; CHECK-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX12]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[DOTSPLAT19]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[L]], [[N_VEC8]] -; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N25:%.*]] = icmp eq i64 [[L]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N25]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i64 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i16 [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END13]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i16 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END10]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL9]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[P_09:%.*]] = phi i16 [ [[BC_RESUME_VAL14]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[P_09:%.*]] = phi i16 [ [[BC_RESUME_VAL11]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ADD]] = sub i16 [[P_09]], [[OFF]] ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[IV]] ; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX3]], align 2 @@ -240,6 +238,8 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK: exit: ; CHECK-NEXT: ret void ; + + entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index ca38d274776007..0e511cfc9bffed 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -225,12 +225,12 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP1]] = sub nsw <2 x i64> zeroinitializer, [[STEP_ADD]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 ; CHECK-NEXT: store i64 [[TMP2]], ptr [[GEP]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 36 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -303,16 +303,16 @@ define void @for_iv_trunc_optimized(ptr %dst) { ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP3]] = or <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP0]], splat (i32 3) +; CHECK-NEXT: [[TMP3]] = or <4 x i32> [[TMP1]], splat (i32 3) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3 ; CHECK-NEXT: store i32 [[TMP6]], ptr [[DST:%.*]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 336 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 54dd9c870a1709..39fa87fbb569f6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -32,9 +32,9 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC: vector.body: ; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <8 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <8 x float> [[VEC_IND]], splat (float 4.000000e+00) +; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[VEC_IND]], splat (float 8.000000e+00) +; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[VEC_IND]], splat (float 1.200000e+01) ; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32 ; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 64 @@ -44,7 +44,7 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD2]], ptr [[TMP3]], align 4 ; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD3]], ptr [[TMP4]], align 4 ; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <8 x float> [[VEC_IND]], +; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <8 x float> [[VEC_IND]], splat (float 1.600000e+01) ; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AUTO_VEC: middle.block: @@ -207,9 +207,9 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC: vector.body: ; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x double> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], splat (double 1.200000e+01) +; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], splat (double 2.400000e+01) +; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], splat (double 3.600000e+01) ; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]] ; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32 ; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 64 @@ -219,7 +219,7 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], ptr [[TMP3]], align 8 ; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_3]], ptr [[TMP4]], align 8 ; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], splat (double 4.800000e+01) ; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; AUTO_VEC: middle.block: @@ -371,9 +371,9 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC: vector.body: ; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <8 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], -; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], +; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], splat (float 3.360000e+02) +; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], splat (float 3.360000e+02) +; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], splat (float 3.360000e+02) ; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 [[INDEX]] ; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 32 ; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 64 @@ -391,7 +391,7 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: store <8 x float> [[TMP8]], ptr [[TMP4]], align 4 ; AUTO_VEC-NEXT: store <8 x float> [[TMP9]], ptr [[TMP5]], align 4 ; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd reassoc <8 x float> [[STEP_ADD3]], +; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd reassoc <8 x float> [[STEP_ADD3]], splat (float 3.360000e+02) ; AUTO_VEC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AUTO_VEC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; AUTO_VEC: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 6516b05ab4ede9..cc190fb826911e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -38,7 +38,7 @@ define void @foo1(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; AVX512-NEXT: [[TMP6:%.*]] = sext <16 x i32> [[WIDE_MASKED_LOAD]] to <16 x i64> ; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[IN:%.*]], <16 x i64> [[TMP6]] ; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP7]], i32 4, <16 x i1> [[TMP3]], <16 x float> poison) -; AVX512-NEXT: [[TMP8:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER]], +; AVX512-NEXT: [[TMP8:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; AVX512-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[OUT:%.*]], i64 [[TMP0]] ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[TMP9]], i32 0 ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP8]], ptr [[TMP10]], i32 4, <16 x i1> [[TMP3]]) @@ -64,7 +64,7 @@ define void @foo1(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: [[TMP6:%.*]] = sext <2 x i32> [[WIDE_MASKED_LOAD]] to <2 x i64> ; FVW2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[IN:%.*]], <2 x i64> [[TMP6]] ; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP7]], i32 4, <2 x i1> [[TMP3]], <2 x float> poison) -; FVW2-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], +; FVW2-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; FVW2-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[OUT:%.*]], i64 [[TMP0]] ; FVW2-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[TMP9]], i32 0 ; FVW2-NEXT: call void @llvm.masked.store.v2f32.p0(<2 x float> [[TMP8]], ptr [[TMP10]], i32 4, <2 x i1> [[TMP3]]) @@ -124,15 +124,15 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> , <16 x i32> poison) +; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) ; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer ; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison) -; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], +; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], splat (float 5.000000e-01) ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> [[TMP4]], i32 4, <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], +; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; AVX512: for.end: @@ -156,7 +156,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer ; FVW2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1 ; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison) -; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], +; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: @@ -174,7 +174,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FVW2: for.end: @@ -231,15 +231,15 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> , <16 x i32> poison) +; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) ; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer ; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison) -; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER1]], +; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER1]], splat (float 5.000000e-01) ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> [[TMP4]], i32 4, <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], +; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AVX512: for.end: @@ -263,7 +263,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; FVW2-NEXT: [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer ; FVW2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1 ; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison) -; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], +; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: @@ -281,7 +281,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE2]] ; FVW2: pred.store.continue2: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FVW2: for.end: @@ -325,15 +325,15 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> , <16 x i32> poison) +; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) ; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer ; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p1(<16 x ptr addrspace(1)> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison) -; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], +; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], splat (float 5.000000e-01) ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p1(<16 x float> [[TMP3]], <16 x ptr addrspace(1)> [[TMP4]], i32 4, <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], +; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; AVX512: for.end: @@ -357,7 +357,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; FVW2-NEXT: [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer ; FVW2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1 ; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p1(<2 x ptr addrspace(1)> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison) -; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], +; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: @@ -375,7 +375,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; FVW2: for.end: @@ -418,15 +418,15 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> , <16 x i32> poison) +; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) ; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer ; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p1(<16 x ptr addrspace(1)> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison) -; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], +; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], splat (float 5.000000e-01) ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> [[TMP4]], i32 4, <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], +; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; AVX512: for.end: @@ -450,7 +450,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; FVW2-NEXT: [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer ; FVW2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1 ; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p1(<2 x ptr addrspace(1)> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison) -; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], +; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: @@ -468,7 +468,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FVW2: for.end: @@ -511,15 +511,15 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> , <16 x i32> poison) +; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) ; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer ; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1 ; AVX512-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison) -; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], +; AVX512-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], splat (float 5.000000e-01) ; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], <16 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p1(<16 x float> [[TMP3]], <16 x ptr addrspace(1)> [[TMP4]], i32 4, <16 x i1> [[TMP1]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], +; AVX512-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256) ; AVX512-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; AVX512-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; AVX512: for.end: @@ -543,7 +543,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; FVW2-NEXT: [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer ; FVW2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1 ; FVW2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison) -; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], +; FVW2-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01) ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: @@ -561,7 +561,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE3]] ; FVW2: pred.store.continue3: ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 -; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32) ; FVW2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; FVW2: for.end: @@ -652,11 +652,11 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM]] ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP18]], align 4, !alias.scope [[META8:![0-9]+]] -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD]], <16 x ptr> [[TMP14]], i32 4, <16 x i1> ), !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]] +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD]], <16 x ptr> [[TMP14]], i32 4, <16 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]] ; AVX512-NEXT: [[TMP19:%.*]] = getelementptr float, ptr [[TMP16]], i32 0 ; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP19]], align 4, !alias.scope [[META15:![0-9]+]] ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, <16 x ptr> [[TMP14]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD8]], <16 x ptr> [[TMP20]], i32 4, <16 x i1> ), !alias.scope [[META11]], !noalias [[META13]] +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD8]], <16 x ptr> [[TMP20]], i32 4, <16 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX512-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 1024 ; AVX512-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -691,12 +691,12 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP27]] ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM]] ; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD22:%.*]] = load <8 x float>, ptr [[TMP30]], align 4, !alias.scope [[META17:![0-9]+]] -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD22]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> ), !alias.scope [[META20:![0-9]+]], !noalias [[META22:![0-9]+]] +; AVX512-NEXT: [[WIDE_LOAD23:%.*]] = load <8 x float>, ptr [[TMP30]], align 4, !alias.scope [[META17:![0-9]+]] +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD23]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META20:![0-9]+]], !noalias [[META22:![0-9]+]] ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr float, ptr [[TMP28]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD23:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope [[META24:![0-9]+]] +; AVX512-NEXT: [[WIDE_LOAD24:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope [[META24:![0-9]+]] ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP26]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD23]], <8 x ptr> [[TMP32]], i32 4, <8 x i1> ), !alias.scope [[META20]], !noalias [[META22]] +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD24]], <8 x ptr> [[TMP32]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META20]], !noalias [[META22]] ; AVX512-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX18]], 8 ; AVX512-NEXT: [[PTR_IND20]] = getelementptr i8, ptr [[POINTER_PHI19]], i64 512 ; AVX512-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC10]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll index 637b985b4562ed..a90b4df3999f2a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll @@ -18,8 +18,8 @@ define void @gep_use_in_dead_block(ptr noalias %dst, ptr %src) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 10) +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP2]] @@ -97,13 +97,13 @@ define void @gep_use_outside_loop(ptr noalias %dst, ptr %src) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 10) +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP6]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v4i16.p0(<4 x i16> zeroinitializer, ptr [[TMP7]], i32 2, <4 x i1> [[TMP5]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll index 8200e7df2a2602..5ba559af077ca6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -31,9 +31,9 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US:%.*]] ; CHECK: for.end.us: ; CHECK-NEXT: [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV33:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access [[META0:![0-9]+]] ; CHECK-NEXT: [[ADD10_US:%.*]] = add nsw i32 [[TMP3]], 3 -; CHECK-NEXT: store i32 [[ADD10_US]], ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: store i32 [[ADD10_US]], ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access [[META0]] ; CHECK-NEXT: [[INDVARS_IV_NEXT34:%.*]] = add i64 [[INDVARS_IV33]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV35:%.*]] = trunc i64 [[INDVARS_IV_NEXT34]] to i32 ; CHECK-NEXT: [[EXITCOND36:%.*]] = icmp eq i32 [[LFTR_WIDEIV35]], [[M]] @@ -44,9 +44,9 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: [[ADD4_US:%.*]] = add i32 [[ADD_US:%.*]], [[TMP4]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD4_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4, !llvm.mem.parallel_loop_access [[META0]] ; CHECK-NEXT: [[ADD5_US:%.*]] = add nsw i32 [[TMP5]], 1 -; CHECK-NEXT: store i32 [[ADD5_US]], ptr [[ARRAYIDX7_US:%.*]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: store i32 [[ADD5_US]], ptr [[ARRAYIDX7_US:%.*]], align 4, !llvm.mem.parallel_loop_access [[META0]] ; CHECK-NEXT: [[INDVARS_IV_NEXT30]] = add i64 [[INDVARS_IV29]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV31:%.*]] = trunc i64 [[INDVARS_IV_NEXT30]] to i32 ; CHECK-NEXT: [[EXITCOND32:%.*]] = icmp eq i32 [[LFTR_WIDEIV31]], [[M]] @@ -77,9 +77,9 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[TMP17]], i32 3 -; CHECK-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll index 0c2ec268ad2070..2dac60a0fcfcba 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -80,8 +80,8 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; SSE-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i32 2 ; SSE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; SSE-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 -; SSE-NEXT: [[TMP6:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], -; SSE-NEXT: [[TMP7:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD2]], +; SSE-NEXT: [[TMP6:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], splat (double 4.200000e+01) +; SSE-NEXT: [[TMP7:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD2]], splat (double 4.200000e+01) ; SSE-NEXT: [[TMP8:%.*]] = fadd fast <2 x double> [[VEC_PHI]], [[WIDE_LOAD]] ; SSE-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[VEC_PHI1]], [[WIDE_LOAD2]] ; SSE-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP6]], <2 x double> [[TMP8]], <2 x double> [[VEC_PHI]] @@ -139,10 +139,10 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; AVX-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x double>, ptr [[TMP9]], align 8 ; AVX-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x double>, ptr [[TMP10]], align 8 ; AVX-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP11]], align 8 -; AVX-NEXT: [[TMP12:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD]], -; AVX-NEXT: [[TMP13:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD4]], -; AVX-NEXT: [[TMP14:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD5]], -; AVX-NEXT: [[TMP15:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD6]], +; AVX-NEXT: [[TMP12:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD]], splat (double 4.200000e+01) +; AVX-NEXT: [[TMP13:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD4]], splat (double 4.200000e+01) +; AVX-NEXT: [[TMP14:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD5]], splat (double 4.200000e+01) +; AVX-NEXT: [[TMP15:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD6]], splat (double 4.200000e+01) ; AVX-NEXT: [[TMP16:%.*]] = fadd fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]] ; AVX-NEXT: [[TMP17:%.*]] = fadd fast <4 x double> [[VEC_PHI1]], [[WIDE_LOAD4]] ; AVX-NEXT: [[TMP18:%.*]] = fadd fast <4 x double> [[VEC_PHI2]], [[WIDE_LOAD5]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index c0098eb533c00d..f415a81d3b7c73 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -18,23 +18,23 @@ define i32 @iv_used_widened_and_truncated(ptr %dst, i64 %N) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], -; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <8 x i32> [[VEC_IND4]], -; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <8 x i32> [[STEP_ADD5]], -; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <8 x i32> [[STEP_ADD6]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <8 x i32> [[VEC_IND4]], splat (i32 8) +; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <8 x i32> [[STEP_ADD5]], splat (i32 8) +; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <8 x i32> [[STEP_ADD6]], splat (i32 8) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD2]] -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VEC_IND4]], <8 x ptr> [[TMP1]], i32 8, <8 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD5]], <8 x ptr> [[TMP2]], i32 8, <8 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD6]], <8 x ptr> [[TMP3]], i32 8, <8 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD7]], <8 x ptr> [[TMP4]], i32 8, <8 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VEC_IND4]], <8 x ptr> [[TMP1]], i32 8, <8 x i1> splat (i1 true)) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD5]], <8 x ptr> [[TMP2]], i32 8, <8 x i1> splat (i1 true)) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD6]], <8 x ptr> [[TMP3]], i32 8, <8 x i1> splat (i1 true)) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD7]], <8 x ptr> [[TMP4]], i32 8, <8 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], -; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <8 x i32> [[STEP_ADD7]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], splat (i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <8 x i32> [[STEP_ADD7]], splat (i32 8) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -88,24 +88,24 @@ define void @multiple_truncated_ivs_with_wide_uses(i1 %c, ptr %A, ptr %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) +; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C]], <4 x i16> [[VEC_IND]], <4 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[C]], <4 x i16> [[STEP_ADD]], <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C]], <4 x i16> [[VEC_IND]], <4 x i16> splat (i16 10) +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C]], <4 x i16> [[STEP_ADD]], <4 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4 -; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP6]], align 2, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] -; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP7]], align 2, !alias.scope [[META4]], !noalias [[META7]] +; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP6]], align 2, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] +; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP7]], align 2, !alias.scope [[META4]], !noalias [[META7]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4 ; CHECK-NEXT: store <4 x i32> [[VEC_IND3]], ptr [[TMP10]], align 4, !alias.scope [[META7]] ; CHECK-NEXT: store <4 x i32> [[STEP_ADD4]], ptr [[TMP11]], align 4, !alias.scope [[META7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], -; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) +; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], splat (i32 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: @@ -158,18 +158,18 @@ define void @truncated_ivs_with_wide_and_scalar_uses(i1 %c, ptr %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i16> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i16> [[VEC_IND]], splat (i16 8) ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[VEC_IND]], <8 x i16> -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[STEP_ADD]], <8 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[VEC_IND]], <8 x i16> splat (i16 10) +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[STEP_ADD]], <8 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[TMP3]], i32 8 ; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store <8 x i16> [[TMP6]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[STEP_ADD]], splat (i16 8) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: @@ -273,7 +273,7 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = zext <16 x i8> [[WIDE_LOAD24]] to <16 x i32> ; CHECK-NEXT: [[TMP22]] = add <16 x i32> [[TMP19]], [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP22]], <16 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = lshr <16 x i32> [[TMP23]], +; CHECK-NEXT: [[TMP24:%.*]] = lshr <16 x i32> [[TMP23]], splat (i32 1) ; CHECK-NEXT: [[TMP25:%.*]] = trunc <16 x i32> [[TMP24]] to <16 x i8> ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP25]], i32 0 ; CHECK-NEXT: store i8 [[TMP26]], ptr [[NEXT_GEP]], align 1, !alias.scope [[META16:![0-9]+]], !noalias [[META13]] @@ -382,11 +382,11 @@ define i16 @iv_and_step_trunc() { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i16> ; CHECK-NEXT: [[TMP2]] = mul <2 x i16> [[VEC_IND1]], [[TMP1]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: @@ -441,18 +441,18 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <8 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <8 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <8 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i1> [[TMP5]], -; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i1> [[TMP6]], -; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i1> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i1> [[TMP5]], splat (i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[VEC_IND]] ; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD]] ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD1]] @@ -471,7 +471,7 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr [[TMP26]], i32 4, <8 x i1> [[TMP10]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP23]], ptr [[TMP27]], i32 4, <8 x i1> [[TMP11]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], splat (i64 8) ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 ; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: @@ -571,7 +571,7 @@ define void @wide_iv_trunc(ptr %dst, i64 %N) { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: @@ -665,7 +665,7 @@ define void @wombat(i32 %arg, ptr %dst) #1 { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 4, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], splat (i32 12) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -739,7 +739,7 @@ define void @wombat2(i32 %arg, ptr %dst) #1 { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 4, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], splat (i32 12) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -816,7 +816,7 @@ define void @with_dead_use(i32 %arg, ptr %dst) #1 { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 4, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], splat (i32 12) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index ad0068dc3f6be7..fd89c8b0193415 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -485,7 +485,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[STRIDED_VEC34:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP50]] ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[B]], <4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> , <4 x i32> poison), !alias.scope [[META6:![0-9]+]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]] ; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC34]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer, <8 x i32> @@ -495,7 +495,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[TMP63]], <32 x i32> poison, <32 x i32> ; CHECK-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -603,7 +603,7 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -725,7 +725,7 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll index 717dbe359f1050..71b3f296e6dfbf 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll @@ -7,8 +7,8 @@ target triple = "x86_64-apple-macos" ; that store into the last store (by creating an interleaved store group). This ; means the loaded %l2 will have incorrect value. define void @avoid_sinking_store_across_load(ptr %arr) { -; CHECK-LABEL: define void @avoid_sinking_store_across_load -; CHECK-SAME: (ptr [[ARR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-LABEL: define void @avoid_sinking_store_across_load( +; CHECK-SAME: ptr [[ARR:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -24,22 +24,22 @@ define void @avoid_sinking_store_across_load(ptr %arr) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[VEC_IND2]] -; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP8]], <4 x ptr> [[TMP7]], i32 4, <4 x i1> ) -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP7]], i32 0 -; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <12 x i32>, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[VEC_IND2]] +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[STRIDED_VEC]], splat (i32 25) +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP7]], <4 x ptr> [[TMP6]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr> [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <12 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC4]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <12 x i32> [[WIDE_VEC4]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[STRIDED_VEC5]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP11]], <4 x ptr> [[TMP5]], i32 4, <4 x i1> ) +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[STRIDED_VEC5]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP9]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 12) +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 8e934dcfae5510..1433e48690bc60 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -31,9 +31,9 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], splat (i64 4) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] @@ -57,7 +57,7 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[TMP18]] = add <4 x i32> [[VEC_PHI5]], [[PREDPHI11]] ; CHECK-NEXT: [[TMP19]] = add <4 x i32> [[VEC_PHI6]], [[PREDPHI12]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], splat (i64 4) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll index ecae00807f4e72..e945ad40039af8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll @@ -17,18 +17,18 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <8 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <8 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <8 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i1> [[TMP5]], -; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i1> [[TMP6]], -; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i1> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i1> [[TMP5]], splat (i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[VEC_IND]] ; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD]] ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD1]] @@ -47,7 +47,7 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr [[TMP26]], i32 4, <8 x i1> [[TMP10]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP23]], ptr [[TMP27]], i32 4, <8 x i1> [[TMP11]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], splat (i64 8) ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 ; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -181,8 +181,8 @@ define void @test_scalar_cost_single_store_loop_varying_cond(ptr %dst, ptr noali ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i32> [[WIDE_VEC4]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC]], -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC5]], +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC]], splat (i32 123) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC5]], splat (i32 123) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP10]], i32 4, <4 x i1> [[TMP8]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index d2600cd59b6dfe..11383aad4e90bd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -39,7 +39,7 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP2]] ; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 -; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], +; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX1-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP2]] ; AVX1-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x i32> poison) @@ -102,10 +102,10 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 ; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 -; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], -; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], -; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], +; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], splat (i32 100) +; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP2]] ; AVX2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 8 @@ -185,10 +185,10 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 ; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4 -; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], -; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], -; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], -; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], +; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], splat (i32 100) +; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP2]] ; AVX512-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 16 @@ -227,7 +227,7 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP27]] ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 0 ; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr [[TMP29]], align 4 -; AVX512-NEXT: [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], +; AVX512-NEXT: [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], splat (i32 100) ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP27]] ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[TMP31]], i32 0 ; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP32]], i32 4, <8 x i1> [[TMP30]], <8 x i32> poison) @@ -314,7 +314,7 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[TMP2]] ; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 0 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP4]], align 4 -; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], +; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX1-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP2]] ; AVX1-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP6]], i32 0 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x i32> poison) @@ -377,10 +377,10 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP6]], align 4 ; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP7]], align 4 -; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], -; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], -; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], +; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], splat (i32 100) +; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP2]] ; AVX2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 0 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 8 @@ -460,10 +460,10 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP6]], align 4 ; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP7]], align 4 -; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], -; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], -; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], -; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], +; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], splat (i32 100) +; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP2]] ; AVX512-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 0 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 16 @@ -502,7 +502,7 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[TMP27]] ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP28]], i32 0 ; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP29]], align 4 -; AVX512-NEXT: [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], +; AVX512-NEXT: [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], splat (i32 100) ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP27]] ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP31]], i32 0 ; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) [[TMP32]], i32 4, <8 x i1> [[TMP30]], <8 x i32> poison) @@ -598,7 +598,7 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP2]] ; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 -; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], +; AVX1-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX1-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP2]] ; AVX1-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[TMP6]], i32 0 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x float> poison) @@ -663,10 +663,10 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 ; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 -; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], -; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], -; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], +; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], splat (i32 100) +; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP2]] ; AVX2-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP12]], i32 0 ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 8 @@ -751,10 +751,10 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 ; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4 -; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], -; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], -; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], -; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], +; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], splat (i32 100) +; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP2]] ; AVX512-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP12]], i32 0 ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 16 @@ -797,7 +797,7 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP31]] ; AVX512-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 0 ; AVX512-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4 -; AVX512-NEXT: [[TMP34:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], +; AVX512-NEXT: [[TMP34:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], splat (i32 100) ; AVX512-NEXT: [[TMP35:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP31]] ; AVX512-NEXT: [[TMP36:%.*]] = getelementptr float, ptr [[TMP35]], i32 0 ; AVX512-NEXT: [[WIDE_MASKED_LOAD13:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP36]], i32 4, <8 x i1> [[TMP34]], <8 x float> poison) @@ -904,10 +904,10 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META8]] ; AVX-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8]] ; AVX-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META8]] -; AVX-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], -; AVX-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], -; AVX-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], -; AVX-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], +; AVX-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], splat (i32 100) +; AVX-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP0]] ; AVX-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i32 0 ; AVX-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 4 @@ -992,10 +992,10 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META11]] ; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META11]] ; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META11]] -; AVX512-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], -; AVX512-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], -; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], -; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD8]], +; AVX512-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) +; AVX512-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) +; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) +; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP0]] ; AVX512-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i32 0 ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 @@ -1137,9 +1137,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], <8 x i64> [[VEC_IND]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP0]], i32 4, <8 x i1> , <8 x i32> poison), !alias.scope [[META21:![0-9]+]] -; AVX512-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[WIDE_MASKED_GATHER]], -; AVX512-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i64> [[VEC_IND]], +; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP0]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !alias.scope [[META21:![0-9]+]] +; AVX512-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 100) +; AVX512-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i64> [[VEC_IND]], splat (i64 1) ; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[B]], <8 x i64> [[TMP2]] ; AVX512-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP3]], i32 8, <8 x i1> [[TMP1]], <8 x double> poison), !alias.scope [[META24:![0-9]+]] ; AVX512-NEXT: [[TMP4:%.*]] = sitofp <8 x i32> [[WIDE_MASKED_GATHER]] to <8 x double> @@ -1147,7 +1147,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[A]], <8 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> [[TMP5]], <8 x ptr> [[TMP6]], i32 8, <8 x i1> [[TMP1]]), !alias.scope [[META26:![0-9]+]], !noalias [[META28:![0-9]+]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; AVX512-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; AVX512-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 128) ; AVX512-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 624 ; AVX512-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; AVX512: middle.block: @@ -1306,10 +1306,10 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> ; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP22]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope [[META21]] ; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: [[TMP23:%.*]] = fadd <4 x double> [[REVERSE13]], -; AVX2-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[REVERSE16]], -; AVX2-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[REVERSE19]], -; AVX2-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[REVERSE22]], +; AVX2-NEXT: [[TMP23:%.*]] = fadd <4 x double> [[REVERSE13]], splat (double 5.000000e-01) +; AVX2-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[REVERSE16]], splat (double 5.000000e-01) +; AVX2-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[REVERSE19]], splat (double 5.000000e-01) +; AVX2-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP0]] ; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0 ; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -3 @@ -1418,10 +1418,10 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP13]], <8 x i1> poison, <8 x i32> ; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP22]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: [[TMP23:%.*]] = fadd <8 x double> [[REVERSE13]], -; AVX512-NEXT: [[TMP24:%.*]] = fadd <8 x double> [[REVERSE16]], -; AVX512-NEXT: [[TMP25:%.*]] = fadd <8 x double> [[REVERSE19]], -; AVX512-NEXT: [[TMP26:%.*]] = fadd <8 x double> [[REVERSE22]], +; AVX512-NEXT: [[TMP23:%.*]] = fadd <8 x double> [[REVERSE13]], splat (double 5.000000e-01) +; AVX512-NEXT: [[TMP24:%.*]] = fadd <8 x double> [[REVERSE16]], splat (double 5.000000e-01) +; AVX512-NEXT: [[TMP25:%.*]] = fadd <8 x double> [[REVERSE19]], splat (double 5.000000e-01) +; AVX512-NEXT: [[TMP26:%.*]] = fadd <8 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP0]] ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0 ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -7 @@ -1527,18 +1527,18 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; AVX1-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 -; AVX1-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], -; AVX1-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], -; AVX1-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], -; AVX1-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], +; AVX1-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1) +; AVX1-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1) +; AVX1-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1) +; AVX1-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1) ; AVX1-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer ; AVX1-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer ; AVX1-NEXT: [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer ; AVX1-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[TMP9]], zeroinitializer -; AVX1-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], -; AVX1-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], -; AVX1-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], -; AVX1-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], +; AVX1-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) +; AVX1-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) +; AVX1-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) +; AVX1-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) ; AVX1-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] ; AVX1-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 ; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 @@ -1552,10 +1552,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP24:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer ; AVX1-NEXT: [[TMP25:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer ; AVX1-NEXT: [[TMP26:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer -; AVX1-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], -; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], -; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], -; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], +; AVX1-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], splat (i1 true) +; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], splat (i1 true) +; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], splat (i1 true) +; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], splat (i1 true) ; AVX1-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP27]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer @@ -1565,10 +1565,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 ; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 ; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) ; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX1-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX1-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -1627,18 +1627,18 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; AVX2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 -; AVX2-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], -; AVX2-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], -; AVX2-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], +; AVX2-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1) +; AVX2-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1) +; AVX2-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1) +; AVX2-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1) ; AVX2-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer ; AVX2-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer ; AVX2-NEXT: [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer ; AVX2-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[TMP9]], zeroinitializer -; AVX2-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], -; AVX2-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], -; AVX2-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], -; AVX2-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], +; AVX2-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) +; AVX2-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) +; AVX2-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) +; AVX2-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) ; AVX2-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] ; AVX2-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 ; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 @@ -1652,10 +1652,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP24:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer ; AVX2-NEXT: [[TMP25:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer ; AVX2-NEXT: [[TMP26:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer -; AVX2-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], -; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], -; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], -; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], +; AVX2-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], splat (i1 true) +; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], splat (i1 true) +; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], splat (i1 true) +; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], splat (i1 true) ; AVX2-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP27]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer @@ -1665,10 +1665,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 ; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 ; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX2-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] @@ -1727,18 +1727,18 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 ; AVX512-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 -; AVX512-NEXT: [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], -; AVX512-NEXT: [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], -; AVX512-NEXT: [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], -; AVX512-NEXT: [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], +; AVX512-NEXT: [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], splat (i8 1) +; AVX512-NEXT: [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], splat (i8 1) +; AVX512-NEXT: [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], splat (i8 1) +; AVX512-NEXT: [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], splat (i8 1) ; AVX512-NEXT: [[TMP10:%.*]] = icmp eq <8 x i8> [[TMP6]], zeroinitializer ; AVX512-NEXT: [[TMP11:%.*]] = icmp eq <8 x i8> [[TMP7]], zeroinitializer ; AVX512-NEXT: [[TMP12:%.*]] = icmp eq <8 x i8> [[TMP8]], zeroinitializer ; AVX512-NEXT: [[TMP13:%.*]] = icmp eq <8 x i8> [[TMP9]], zeroinitializer -; AVX512-NEXT: [[TMP14:%.*]] = xor <8 x i1> [[TMP10]], -; AVX512-NEXT: [[TMP15:%.*]] = xor <8 x i1> [[TMP11]], -; AVX512-NEXT: [[TMP16:%.*]] = xor <8 x i1> [[TMP12]], -; AVX512-NEXT: [[TMP17:%.*]] = xor <8 x i1> [[TMP13]], +; AVX512-NEXT: [[TMP14:%.*]] = xor <8 x i1> [[TMP10]], splat (i1 true) +; AVX512-NEXT: [[TMP15:%.*]] = xor <8 x i1> [[TMP11]], splat (i1 true) +; AVX512-NEXT: [[TMP16:%.*]] = xor <8 x i1> [[TMP12]], splat (i1 true) +; AVX512-NEXT: [[TMP17:%.*]] = xor <8 x i1> [[TMP13]], splat (i1 true) ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] ; AVX512-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 @@ -1752,10 +1752,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP24:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer ; AVX512-NEXT: [[TMP25:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer ; AVX512-NEXT: [[TMP26:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer -; AVX512-NEXT: [[TMP27:%.*]] = xor <8 x i1> [[TMP23]], -; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP24]], -; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP25]], -; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP26]], +; AVX512-NEXT: [[TMP27:%.*]] = xor <8 x i1> [[TMP23]], splat (i1 true) +; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP24]], splat (i1 true) +; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP25]], splat (i1 true) +; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP26]], splat (i1 true) ; AVX512-NEXT: [[TMP31:%.*]] = select <8 x i1> [[TMP14]], <8 x i1> [[TMP27]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP15]], <8 x i1> [[TMP28]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP16]], <8 x i1> [[TMP29]], <8 x i1> zeroinitializer @@ -1765,10 +1765,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 ; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16 ; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24 -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]]) -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]]) -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]]) -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX512-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] @@ -1872,18 +1872,18 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; AVX1-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 -; AVX1-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], -; AVX1-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], -; AVX1-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], -; AVX1-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], +; AVX1-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1) +; AVX1-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1) +; AVX1-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1) +; AVX1-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1) ; AVX1-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer ; AVX1-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer ; AVX1-NEXT: [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer ; AVX1-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[TMP9]], zeroinitializer -; AVX1-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], -; AVX1-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], -; AVX1-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], -; AVX1-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], +; AVX1-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) +; AVX1-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) +; AVX1-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) +; AVX1-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) ; AVX1-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] ; AVX1-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 ; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 @@ -1897,10 +1897,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP24:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer ; AVX1-NEXT: [[TMP25:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer ; AVX1-NEXT: [[TMP26:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer -; AVX1-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], -; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], -; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], -; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], +; AVX1-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], splat (i1 true) +; AVX1-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], splat (i1 true) +; AVX1-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], splat (i1 true) +; AVX1-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], splat (i1 true) ; AVX1-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP27]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer @@ -1910,10 +1910,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 ; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 ; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) ; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX1-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX1-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -1972,18 +1972,18 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; AVX2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 -; AVX2-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], -; AVX2-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], -; AVX2-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], +; AVX2-NEXT: [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1) +; AVX2-NEXT: [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1) +; AVX2-NEXT: [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1) +; AVX2-NEXT: [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1) ; AVX2-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer ; AVX2-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer ; AVX2-NEXT: [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer ; AVX2-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[TMP9]], zeroinitializer -; AVX2-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], -; AVX2-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], -; AVX2-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], -; AVX2-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], +; AVX2-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) +; AVX2-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) +; AVX2-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) +; AVX2-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) ; AVX2-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] ; AVX2-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 ; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 @@ -1997,10 +1997,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP24:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer ; AVX2-NEXT: [[TMP25:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer ; AVX2-NEXT: [[TMP26:%.*]] = icmp eq <4 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer -; AVX2-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], -; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], -; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], -; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], +; AVX2-NEXT: [[TMP27:%.*]] = xor <4 x i1> [[TMP23]], splat (i1 true) +; AVX2-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP24]], splat (i1 true) +; AVX2-NEXT: [[TMP29:%.*]] = xor <4 x i1> [[TMP25]], splat (i1 true) +; AVX2-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[TMP26]], splat (i1 true) ; AVX2-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP27]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer @@ -2010,10 +2010,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 ; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 ; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> , ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]]) +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]]) ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX2-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -2072,18 +2072,18 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 ; AVX512-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 -; AVX512-NEXT: [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], -; AVX512-NEXT: [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], -; AVX512-NEXT: [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], -; AVX512-NEXT: [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], +; AVX512-NEXT: [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], splat (i8 1) +; AVX512-NEXT: [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], splat (i8 1) +; AVX512-NEXT: [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], splat (i8 1) +; AVX512-NEXT: [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], splat (i8 1) ; AVX512-NEXT: [[TMP10:%.*]] = icmp eq <8 x i8> [[TMP6]], zeroinitializer ; AVX512-NEXT: [[TMP11:%.*]] = icmp eq <8 x i8> [[TMP7]], zeroinitializer ; AVX512-NEXT: [[TMP12:%.*]] = icmp eq <8 x i8> [[TMP8]], zeroinitializer ; AVX512-NEXT: [[TMP13:%.*]] = icmp eq <8 x i8> [[TMP9]], zeroinitializer -; AVX512-NEXT: [[TMP14:%.*]] = xor <8 x i1> [[TMP10]], -; AVX512-NEXT: [[TMP15:%.*]] = xor <8 x i1> [[TMP11]], -; AVX512-NEXT: [[TMP16:%.*]] = xor <8 x i1> [[TMP12]], -; AVX512-NEXT: [[TMP17:%.*]] = xor <8 x i1> [[TMP13]], +; AVX512-NEXT: [[TMP14:%.*]] = xor <8 x i1> [[TMP10]], splat (i1 true) +; AVX512-NEXT: [[TMP15:%.*]] = xor <8 x i1> [[TMP11]], splat (i1 true) +; AVX512-NEXT: [[TMP16:%.*]] = xor <8 x i1> [[TMP12]], splat (i1 true) +; AVX512-NEXT: [[TMP17:%.*]] = xor <8 x i1> [[TMP13]], splat (i1 true) ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN:%.*]], i64 [[TMP0]] ; AVX512-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 0 ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 @@ -2097,10 +2097,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP24:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD4]], zeroinitializer ; AVX512-NEXT: [[TMP25:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD5]], zeroinitializer ; AVX512-NEXT: [[TMP26:%.*]] = icmp eq <8 x ptr> [[WIDE_MASKED_LOAD6]], zeroinitializer -; AVX512-NEXT: [[TMP27:%.*]] = xor <8 x i1> [[TMP23]], -; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP24]], -; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP25]], -; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP26]], +; AVX512-NEXT: [[TMP27:%.*]] = xor <8 x i1> [[TMP23]], splat (i1 true) +; AVX512-NEXT: [[TMP28:%.*]] = xor <8 x i1> [[TMP24]], splat (i1 true) +; AVX512-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP25]], splat (i1 true) +; AVX512-NEXT: [[TMP30:%.*]] = xor <8 x i1> [[TMP26]], splat (i1 true) ; AVX512-NEXT: [[TMP31:%.*]] = select <8 x i1> [[TMP14]], <8 x i1> [[TMP27]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP15]], <8 x i1> [[TMP28]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP16]], <8 x i1> [[TMP29]], <8 x i1> zeroinitializer @@ -2110,10 +2110,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 ; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16 ; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24 -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]]) -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]]) -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]]) -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> , ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]]) +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX512-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll index 3226f72d51d2e4..a12aed7239c9df 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -21,12 +21,12 @@ define i32 @foo_optsize() #0 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], splat (i32 202) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]], <64 x i8> poison) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <64 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> , <64 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> splat (i8 2), <64 x i8> splat (i8 1) ; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0(<64 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 @@ -60,12 +60,12 @@ define i32 @foo_optsize() #0 { ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i64 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer ; AUTOVF-NEXT: [[VEC_IV:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], -; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], +; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], splat (i32 202) ; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]] ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; AUTOVF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]], <32 x i8> poison) ; AUTOVF-NEXT: [[TMP4:%.*]] = icmp eq <32 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer -; AUTOVF-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> , <32 x i8> +; AUTOVF-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> splat (i8 2), <32 x i8> splat (i8 1) ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0(<32 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 @@ -121,12 +121,12 @@ define i32 @foo_minsize() #1 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], splat (i32 202) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]], <64 x i8> poison) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <64 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> , <64 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> splat (i8 2), <64 x i8> splat (i8 1) ; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0(<64 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 @@ -160,12 +160,12 @@ define i32 @foo_minsize() #1 { ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i64 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer ; AUTOVF-NEXT: [[VEC_IV:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], -; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], +; AUTOVF-NEXT: [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], splat (i32 202) ; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]] ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; AUTOVF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]], <32 x i8> poison) ; AUTOVF-NEXT: [[TMP4:%.*]] = icmp eq <32 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer -; AUTOVF-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> , <32 x i8> +; AUTOVF-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> splat (i8 2), <32 x i8> splat (i8 1) ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0(<32 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]]) ; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 @@ -225,12 +225,12 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <64 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <64 x i32> [[TMP1]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <64 x i32> @llvm.masked.gather.v64i32.v64p0(<64 x ptr> [[TMP2]], i32 4, <64 x i1> , <64 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <64 x i32> @llvm.masked.gather.v64i32.v64p0(<64 x ptr> [[TMP2]], i32 4, <64 x i1> splat (i1 true), <64 x i32> poison) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; CHECK-NEXT: store <64 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <64 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <64 x i32> [[VEC_IND]], splat (i32 64) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -264,12 +264,12 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; AUTOVF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; AUTOVF-NEXT: [[TMP1:%.*]] = mul nsw <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; AUTOVF-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <8 x i32> [[TMP1]] -; AUTOVF-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> , <8 x i32> poison) +; AUTOVF-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison) ; AUTOVF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; AUTOVF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; AUTOVF-NEXT: store <8 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP4]], align 4 ; AUTOVF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; AUTOVF-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; AUTOVF-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; AUTOVF-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; AUTOVF-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; AUTOVF: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll index 7aa9d54c85472a..6480c0ab1099d7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll @@ -27,7 +27,7 @@ ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]] ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> ) +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]] ; CHECK: br label %[[InnerLoop:.+]] @@ -35,15 +35,15 @@ ; CHECK: [[InnerLoop]]: ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] ; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> %[[InnerPhi]], -; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> splat (i1 true)) +; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], splat (i64 1) +; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], splat (i64 8) ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0 ; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] ; CHECK: [[ForInc]]: ; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], +; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) ; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body @@ -56,7 +56,7 @@ ; AVX: %[[VecInd:.*]] = phi <8 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; AVX: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <8 x i64> %[[VecInd]] ; AVX: %[[VecIndTr:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32> -; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[VecIndTr]], <8 x ptr> %[[AAddr]], i32 4, <8 x i1> ) +; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[VecIndTr]], <8 x ptr> %[[AAddr]], i32 4, <8 x i1> splat (i1 true)) ; AVX: %[[VecIndTr2:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32> ; AVX: %[[StoreVal:.*]] = add nsw <8 x i32> %[[VecIndTr2]], %[[Splat]] ; AVX: br label %[[InnerLoop:.+]] @@ -64,14 +64,14 @@ ; AVX: [[InnerLoop]]: ; AVX: %[[InnerPhi:.*]] = phi <8 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] ; AVX: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <8 x i64> %[[InnerPhi]], <8 x i64> %[[VecInd]] -; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[StoreVal]], <8 x ptr> %[[AAddr2]], i32 4, <8 x i1> %[[InnerPhi]], -; AVX: %[[VecCond:.*]] = icmp eq <8 x i64> %[[InnerPhiNext]], +; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[StoreVal]], <8 x ptr> %[[AAddr2]], i32 4, <8 x i1> splat (i1 true)) +; AVX: %[[InnerPhiNext]] = add nuw nsw <8 x i64> %[[InnerPhi]], splat (i64 1) +; AVX: %[[VecCond:.*]] = icmp eq <8 x i64> %[[InnerPhiNext]], splat (i64 8) ; AVX: %[[InnerCond:.*]] = extractelement <8 x i1> %[[VecCond]], i32 0 ; AVX: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] ; AVX: [[ForInc]]: -; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], +; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], splat (i64 8) ; AVX: %[[IndNext]] = add nuw i64 %[[Ind]], 8 ; AVX: br i1 true, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll index 5da01a24631bce..bb7fe4d4f1e569 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll @@ -21,8 +21,8 @@ define i32 @unused_blend_after_unrolling(ptr %p, i32 %a, i1 %c.1, i16 %x, i16 %y ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE15:.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[PRED_SDIV_CONTINUE15]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_SDIV_CONTINUE15]] ] -; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]] ; CHECK: [[PRED_SDIV_IF]]: @@ -76,8 +76,8 @@ define i32 @unused_blend_after_unrolling(ptr %p, i32 %a, i1 %c.1, i16 %x, i16 %y ; CHECK: [[PRED_SDIV_CONTINUE15]]: ; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ [[TMP17]], %[[PRED_SDIV_CONTINUE13]] ], [ [[TMP20]], %[[PRED_SDIV_IF14]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i16> zeroinitializer, <4 x i16> [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], -; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], +; CHECK-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], splat (i1 true) +; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT17]], splat (i1 true) ; CHECK-NEXT: [[TMP24]] = or <4 x i1> [[VEC_PHI]], [[TMP22]] ; CHECK-NEXT: [[TMP25]] = or <4 x i1> [[VEC_PHI1]], [[TMP23]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll index fad9a87e5a01d7..ee8374f952c7a3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -26,12 +26,12 @@ define void @foo(ptr %ptr, ptr %ptr.2) { ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP7]], align 8, !alias.scope !3 +; CHECK-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP7]], align 8, !alias.scope [[META3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -53,7 +53,7 @@ define void @foo(ptr %ptr, ptr %ptr.2) { ; CHECK-NEXT: [[TMP12]] = add nuw nsw i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], 80 ; CHECK-NEXT: [[CAN_IV_NEXT]] = add nuw nsw i64 [[CAN_IV]], 1 -; CHECK-NEXT: br i1 [[TMP13]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll index c05c94aa881218..d907c76efd255b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll @@ -29,7 +29,7 @@ define ptr @foo(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> -; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> , <4 x ptr> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16384 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -87,7 +87,7 @@ define ptr @bar(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> -; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> , <4 x ptr> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16384 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll index c139afae9f2309..c91ead00a950d0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -10,7 +10,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -33,7 +33,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label %vector.body ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll index fa69fefba515f3..8321cbcb5a1d80 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll @@ -32,27 +32,27 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <4 x i64> [[STEP_ADD4]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <4 x i64> [[STEP_ADD4]], splat (i64 4) ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 0 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD5]], i32 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP18]], i32 8, <4 x i1> ), !tbaa [[TBAA10:![0-9]+]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP19]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP20]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP21]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP18]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP19]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP20]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> [[TMP21]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND]], i32 1 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 1 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD5]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP22]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP23]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP24]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP25]], i32 8, <4 x i1> ), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP22]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP23]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP24]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP25]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD5]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD5]], splat (i64 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll index cc820f2a8f2a05..a5eb42ae184a7e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll @@ -15,7 +15,7 @@ define void @test_pr55096(i64 %c, ptr %p) { ; CHECK-NEXT: [[TMP0:%.*]] = mul i16 [[DOTCAST]], 2008 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 6229, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -37,9 +37,9 @@ define void @test_pr55096(i64 %c, ptr %p) { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]] ; CHECK: pred.store.continue3: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 340 -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP15]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index b88f413ff1b81c..df9ba2194139b8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -75,10 +75,10 @@ define void @test(ptr %p) { ; VEC-NEXT: store i64 0, ptr [[TMP24]], align 8 ; VEC-NEXT: store i64 0, ptr [[TMP25]], align 8 ; VEC-NEXT: store i64 0, ptr [[TMP26]], align 8 -; VEC-NEXT: [[TMP27:%.*]] = add <4 x i16> [[VEC_IND]], +; VEC-NEXT: [[TMP27:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) ; VEC-NEXT: [[TMP28]] = zext <4 x i16> [[TMP27]] to <4 x i64> ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], +; VEC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; VEC-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VEC: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index 3f38abc75a5837..73bdae71654f12 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -25,8 +25,8 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], splat (i64 8) +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 1 @@ -34,13 +34,13 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE]]) +; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[BB6:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br i1 true, label [[BB6:%.*]], label [[SCALAR_PH]], !prof [[PROF5:![0-9]+]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 87, [[MIDDLE_BLOCK]] ], [ 99, [[BB5:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] @@ -48,7 +48,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1 ; CHECK-NEXT: [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF6:![0-9]+]] ; CHECK: bb18: ; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-NEXT: [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr [[ARR]], i64 [[OR]] @@ -57,7 +57,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 ; CHECK-NEXT: [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90 -; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: bb6: ; CHECK-NEXT: ret void ; @@ -101,7 +101,8 @@ attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" } ; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} ; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 1} -; CHECK: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]} +; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 3} +; CHECK: [[PROF6]] = !{!"branch_weights", i32 1, i32 1} +; CHECK: [[PROF7]] = !{!"branch_weights", i32 0, i32 0} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]], [[META3]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll index dc9c3e27fe08f2..baacad482f9626 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll @@ -27,10 +27,10 @@ define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) { ; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] ; COST-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1 -; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], +; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) ; COST-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP7]], [[TMP8]] -; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]]) +; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]]) ; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; COST-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; COST-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -83,14 +83,14 @@ define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) ; FORCED-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]] ; FORCED-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]] -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -166,12 +166,12 @@ define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr ; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] ; COST-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1 -; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; COST-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP7]], +; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; COST-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; COST-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; COST-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP10]], [[TMP7]] -; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]]) +; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]]) ; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; COST-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; COST-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -225,18 +225,18 @@ define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], -; FORCED-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], +; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) +; FORCED-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) +; FORCED-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) ; FORCED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP9]] ; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP10]] -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP17]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP18]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP17]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP18]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -350,26 +350,26 @@ define void @switch_all_dests_distinct(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) ; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer ; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer ; FORCED-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]] ; FORCED-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]] ; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP13]] ; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP14]] -; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], -; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP13]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP14]]) +; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true) +; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP7]], i32 1, <4 x i1> [[TMP13]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 1, <4 x i1> [[TMP14]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP11]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP19]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP20]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP19]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP20]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -468,17 +468,17 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] ; COST-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1 -; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], +; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) ; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer -; COST-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], -; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP8]], +; COST-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) ; COST-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer ; COST-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer -; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP6]], i32 1, <4 x i1> [[TMP13]]) +; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP6]], i32 1, <4 x i1> [[TMP13]]) ; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]]) -; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP6]], i32 1, <4 x i1> [[TMP7]]) +; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP7]]) ; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; COST-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; COST-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -544,28 +544,28 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) ; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer ; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer -; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP9]], -; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP10]], -; FORCED-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP11]], -; FORCED-NEXT: [[TMP18:%.*]] = xor <4 x i1> [[TMP12]], +; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) +; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) +; FORCED-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) +; FORCED-NEXT: [[TMP18:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) ; FORCED-NEXT: [[TMP19:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP22:%.*]] = select <4 x i1> [[TMP20]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP21]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP22]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP7]], i32 1, <4 x i1> [[TMP21]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 1, <4 x i1> [[TMP22]]) ; FORCED-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -706,16 +706,16 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) ; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer ; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer -; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) +; FORCED-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 14) +; FORCED-NEXT: [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 14) +; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 15) +; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 15) ; FORCED-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP23]], [[TMP25]] ; FORCED-NEXT: [[TMP28:%.*]] = or <4 x i1> [[TMP24]], [[TMP26]] ; FORCED-NEXT: [[TMP21:%.*]] = or <4 x i1> [[TMP13]], [[TMP17]] @@ -724,14 +724,14 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP36:%.*]] = or <4 x i1> [[TMP22]], [[TMP16]] ; FORCED-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP27]], [[TMP35]] ; FORCED-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP28]], [[TMP36]] -; FORCED-NEXT: [[TMP39:%.*]] = xor <4 x i1> [[TMP37]], -; FORCED-NEXT: [[TMP40:%.*]] = xor <4 x i1> [[TMP38]], +; FORCED-NEXT: [[TMP39:%.*]] = xor <4 x i1> [[TMP37]], splat (i1 true) +; FORCED-NEXT: [[TMP40:%.*]] = xor <4 x i1> [[TMP38]], splat (i1 true) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP35]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP36]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP27]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP28]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP39]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP40]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP27]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP28]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP39]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP40]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP41]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -857,20 +857,20 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) ; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP11]] ; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP12]] -; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP17]], -; FORCED-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP18]], +; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true) +; FORCED-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP11]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP20]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP21]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP20]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP21]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -965,16 +965,16 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end, ; COST-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1 ; COST-NEXT: [[TMP7:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], +; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) ; COST-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; COST-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer ; COST-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP10]], [[TMP11]] -; COST-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP12]], +; COST-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) ; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]]) -; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]]) -; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]]) +; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]]) +; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]]) ; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; COST-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -1041,26 +1041,26 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end, ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; FORCED-NEXT: [[TMP10:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]] -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) ; FORCED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP26:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP15]], [[TMP25]] ; FORCED-NEXT: [[TMP20:%.*]] = or <4 x i1> [[TMP16]], [[TMP26]] -; FORCED-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP27]], -; FORCED-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP20]], +; FORCED-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP27]], splat (i1 true) +; FORCED-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP20]], splat (i1 true) ; FORCED-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP21]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP22]], <4 x i1> zeroinitializer ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP25]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP26]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -1194,18 +1194,18 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end, ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) +; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12) +; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13) +; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13) ; FORCED-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP25]] ; FORCED-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP10]], [[TMP26]] -; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP13]], -; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP14]], +; FORCED-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true) +; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true) ; FORCED-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; FORCED-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]] -; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], -; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], +; FORCED-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], splat (i1 true) +; FORCED-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], splat (i1 true) ; FORCED-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP19]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP28:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP20]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP25]] @@ -1214,12 +1214,12 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end, ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP30]]) ; FORCED-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer ; FORCED-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP32]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP33]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP32]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP33]]) ; FORCED-NEXT: [[TMP36:%.*]] = or <4 x i1> [[TMP32]], [[TMP15]] ; FORCED-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP33]], [[TMP16]] -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP36]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP37]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP36]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP37]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -1348,24 +1348,24 @@ define void @large_number_of_cases(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 -; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP19:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP20:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP21:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP22:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], -; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], -; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], +; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 1) +; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 1) +; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3) +; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 3) +; FORCED-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 11) +; FORCED-NEXT: [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 11) +; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 99) +; FORCED-NEXT: [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 99) +; FORCED-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 213) +; FORCED-NEXT: [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 213) +; FORCED-NEXT: [[TMP19:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 238) +; FORCED-NEXT: [[TMP20:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 238) +; FORCED-NEXT: [[TMP21:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 513) +; FORCED-NEXT: [[TMP22:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 513) +; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 791) +; FORCED-NEXT: [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 791) +; FORCED-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 899) +; FORCED-NEXT: [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 899) ; FORCED-NEXT: [[TMP27:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]] ; FORCED-NEXT: [[TMP28:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]] ; FORCED-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP13]] @@ -1382,8 +1382,8 @@ define void @large_number_of_cases(ptr %start, ptr %end) { ; FORCED-NEXT: [[TMP40:%.*]] = or <4 x i1> [[TMP38]], [[TMP24]] ; FORCED-NEXT: [[TMP57:%.*]] = or <4 x i1> [[TMP39]], [[TMP25]] ; FORCED-NEXT: [[TMP58:%.*]] = or <4 x i1> [[TMP40]], [[TMP26]] -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP7]], i32 1, <4 x i1> [[TMP57]]) -; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> , ptr [[TMP8]], i32 1, <4 x i1> [[TMP58]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP57]]) +; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP58]]) ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FORCED-NEXT: [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FORCED-NEXT: br i1 [[TMP59]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll index 9cd990b9878ce6..95c74d19dd2dbc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll @@ -124,8 +124,8 @@ define float @reduction_sum_float_only_reassoc(i32 %n, ptr %array) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP2]], i32 0 @@ -191,8 +191,8 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, ptr %array) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP2]], i32 0 @@ -265,8 +265,8 @@ define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 @@ -348,8 +348,8 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll index 14e29e0d0518a1..8d56c3386a3b1b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll @@ -18,8 +18,8 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]] ; CHECK: [[PRED_UREM_IF]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index ce460f4fe35425..414460872ba13b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -15,23 +15,253 @@ target triple = "x86_64-apple-macosx10.11.0" ; Function Attrs: norecurse nounwind ssp uwtable define void @_Z3fn1v() #0 { ; CHECK-LABEL: @_Z3fn1v( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @c, align 4 +; CHECK-NEXT: [[CMP34:%.*]] = icmp sgt i32 [[TMP0]], 8 +; CHECK-NEXT: br i1 [[CMP34]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @a, align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @b, align 8 +; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[TMP2]], 4063299859190 +; CHECK-NEXT: [[TOBOOL6:%.*]] = icmp eq i64 [[MUL]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP0]] to i64 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[ITER_CHECK27:%.*]], label [[ITER_CHECK:%.*]] +; CHECK: iter.check: +; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], -9 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = add nuw i64 [[TMP5]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP6]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP6]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP6]], 16 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = add i64 8, [[TMP7]] +; CHECK-NEXT: [[IND_END4:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [ , %vector.ph ], [ [[VEC_IND_NEXT4:%.*]], %vector.body ] -; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i64> , [[VEC_IND]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i64> splat (i64 8), [[VEC_IND]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP12]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> , <16 x ptr> [[TMP13]], i32 16, <16 x i1> ) -; CHECK-NEXT: [[TMP14:%.*]] = or disjoint <16 x i64> [[VEC_IND3]], +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP13]], i32 16, <16 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP14:%.*]] = or disjoint <16 x i64> [[VEC_IND3]], splat (i64 1) ; CHECK-NEXT: [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP15]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> , <16 x ptr> [[TMP16]], i32 8, <16 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP16]], i32 8, <16 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <16 x i64> [[VEC_IND3]], -; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <16 x i64> [[VEC_IND3]], splat (i64 32) +; CHECK-NEXT: [[TMP63:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP63]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT99:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[IND_END12:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[TMP64:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END9:%.*]] = add i64 8, [[TMP64]] +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP6]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF6:%.*]] = urem i64 [[TMP6]], 8 +; CHECK-NEXT: [[N_VEC7:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF6]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[N_VEC7]], 2 +; CHECK-NEXT: [[IND_END8:%.*]] = add i64 8, [[TMP17]] +; CHECK-NEXT: [[IND_END11:%.*]] = mul i64 [[N_VEC7]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[DOTSPLAT]], +; CHECK-NEXT: [[DOTSPLATINSERT17:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL5]], i64 0 +; CHECK-NEXT: [[DOTSPLAT18:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT17]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION19:%.*]] = add <8 x i64> [[DOTSPLAT18]], +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT22:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <8 x i64> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND20:%.*]] = phi <8 x i64> [ [[INDUCTION19]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP18:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND15]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND15]] +; CHECK-NEXT: [[TMP20:%.*]] = add nsw <8 x i64> [[TMP18]], [[VEC_IND20]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP19]], <8 x i64> [[TMP20]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP21]], i32 16, <8 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP22:%.*]] = or disjoint <8 x i64> [[VEC_IND20]], splat (i64 1) +; CHECK-NEXT: [[TMP23:%.*]] = add nsw <8 x i64> [[TMP18]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP19]], <8 x i64> [[TMP23]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP24]], i32 8, <8 x i1> splat (i1 true)) +; CHECK-NEXT: [[INDEX_NEXT22]] = add nuw i64 [[INDEX14]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <8 x i64> [[VEC_IND15]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i64> [[VEC_IND20]], splat (i64 16) +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT22]], [[N_VEC7]] +; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[CMP_N23:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC7]] +; CHECK-NEXT: br i1 [[CMP_N23]], label [[FOR_COND_CLEANUP_LOOPEXIT99]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ 8, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: iter.check27: +; CHECK-NEXT: [[TMP26:%.*]] = add nsw i64 [[TMP3]], -9 +; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP26]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK25:%.*]] = icmp ult i64 [[TMP28]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK25]], label [[VEC_EPILOG_SCALAR_PH46:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK29:%.*]] +; CHECK: vector.main.loop.iter.check29: +; CHECK-NEXT: [[MIN_ITERS_CHECK28:%.*]] = icmp ult i64 [[TMP28]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK28]], label [[VEC_EPILOG_PH47:%.*]], label [[VECTOR_PH30:%.*]] +; CHECK: vector.ph30: +; CHECK-NEXT: [[N_MOD_VF31:%.*]] = urem i64 [[TMP28]], 16 +; CHECK-NEXT: [[N_VEC32:%.*]] = sub i64 [[TMP28]], [[N_MOD_VF31]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[TOBOOL6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[N_VEC32]], 2 +; CHECK-NEXT: [[IND_END41:%.*]] = add i64 8, [[TMP29]] +; CHECK-NEXT: [[IND_END43:%.*]] = mul i64 [[N_VEC32]], 2 +; CHECK-NEXT: br label [[VECTOR_BODY33:%.*]] +; CHECK: vector.body33: +; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, [[VECTOR_PH30]] ], [ [[INDEX_NEXT39:%.*]], [[VECTOR_BODY33]] ] +; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , [[VECTOR_PH30]] ], [ [[VEC_IND_NEXT36:%.*]], [[VECTOR_BODY33]] ] +; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , [[VECTOR_PH30]] ], [ [[VEC_IND_NEXT38:%.*]], [[VECTOR_BODY33]] ] +; CHECK-NEXT: [[TMP30:%.*]] = sub nsw <16 x i64> splat (i64 8), [[VEC_IND35]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND35]] +; CHECK-NEXT: [[TMP32:%.*]] = add nsw <16 x i64> [[TMP30]], [[VEC_IND37]] +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP32]], i64 0 +; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[TMP34]]) +; CHECK-NEXT: [[TMP35:%.*]] = or disjoint <16 x i64> [[VEC_IND37]], splat (i64 1) +; CHECK-NEXT: [[TMP36:%.*]] = add nsw <16 x i64> [[TMP30]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP36]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP37]], i32 8, <16 x i1> [[TMP34]]) +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[BROADCAST_SPLAT]]) +; CHECK-NEXT: [[TMP38:%.*]] = or disjoint <16 x i64> [[VEC_IND37]], splat (i64 1) +; CHECK-NEXT: [[TMP39:%.*]] = add nsw <16 x i64> [[TMP30]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP39]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> [[TMP40]], i32 8, <16 x i1> [[BROADCAST_SPLAT]]) +; CHECK-NEXT: [[INDEX_NEXT39]] = add nuw i64 [[INDEX34]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT36]] = add <16 x i64> [[VEC_IND35]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT38]] = add <16 x i64> [[VEC_IND37]], splat (i64 32) +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC32]] +; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK24:%.*]], label [[VECTOR_BODY33]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block24: +; CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC32]] +; CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK48:%.*]] +; CHECK: vec.epilog.iter.check48: +; CHECK-NEXT: [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2 +; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[N_VEC32]], 2 +; CHECK-NEXT: [[IND_END55:%.*]] = add i64 8, [[TMP42]] +; CHECK-NEXT: [[N_VEC_REMAINING49:%.*]] = sub i64 [[TMP28]], [[N_VEC32]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_VEC_REMAINING49]], 8 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH46]], label [[VEC_EPILOG_PH47]] +; CHECK: vec.epilog.ph47: +; CHECK-NEXT: [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ] +; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL51:%.*]] = phi i64 [ [[N_VEC32]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ] +; CHECK-NEXT: [[N_MOD_VF52:%.*]] = urem i64 [[TMP28]], 8 +; CHECK-NEXT: [[N_VEC53:%.*]] = sub i64 [[TMP28]], [[N_MOD_VF52]] +; CHECK-NEXT: [[TMP43:%.*]] = mul i64 [[N_VEC53]], 2 +; CHECK-NEXT: [[IND_END54:%.*]] = add i64 8, [[TMP43]] +; CHECK-NEXT: [[IND_END57:%.*]] = mul i64 [[N_VEC53]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT62:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL42]], i64 0 +; CHECK-NEXT: [[DOTSPLAT63:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT62]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION64:%.*]] = add <8 x i64> [[DOTSPLAT63]], +; CHECK-NEXT: [[DOTSPLATINSERT67:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL44]], i64 0 +; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], +; CHECK-NEXT: [[BROADCAST_SPLATINSERT72:%.*]] = insertelement <8 x i1> poison, i1 [[TOBOOL6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT73:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT72]], <8 x i1> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY60:%.*]] +; CHECK: vec.epilog.vector.body60: +; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH47]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY60]] ] +; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH47]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY60]] ] +; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH47]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY60]] ] +; CHECK-NEXT: [[TMP44:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND65]] +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND65]] +; CHECK-NEXT: [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]] +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP46]], i64 0 +; CHECK-NEXT: [[TMP48:%.*]] = xor <8 x i1> [[BROADCAST_SPLAT73]], splat (i1 true) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[TMP48]]) +; CHECK-NEXT: [[TMP49:%.*]] = or disjoint <8 x i64> [[VEC_IND70]], splat (i64 1) +; CHECK-NEXT: [[TMP50:%.*]] = add nsw <8 x i64> [[TMP44]], [[TMP49]] +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP50]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP51]], i32 8, <8 x i1> [[TMP48]]) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[BROADCAST_SPLAT73]]) +; CHECK-NEXT: [[TMP52:%.*]] = or disjoint <8 x i64> [[VEC_IND70]], splat (i64 1) +; CHECK-NEXT: [[TMP53:%.*]] = add nsw <8 x i64> [[TMP44]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP53]], i64 0 +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> [[TMP54]], i32 8, <8 x i1> [[BROADCAST_SPLAT73]]) +; CHECK-NEXT: [[INDEX_NEXT74]] = add nuw i64 [[INDEX61]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT66]] = add <8 x i64> [[VEC_IND65]], splat (i64 16) +; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) +; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] +; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK45:%.*]], label [[VEC_EPILOG_VECTOR_BODY60]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: vec.epilog.middle.block45: +; CHECK-NEXT: [[CMP_N75:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]] +; CHECK-NEXT: br i1 [[CMP_N75]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH46]] +; CHECK: vec.epilog.scalar.ph46: +; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[ITER_CHECK27]] ] +; CHECK-NEXT: [[BC_RESUME_VAL59:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[ITER_CHECK27]] ] +; CHECK-NEXT: br label [[FOR_BODY_US:%.*]] +; CHECK: for.body.us: +; CHECK-NEXT: [[INDVARS_IV78:%.*]] = phi i64 [ [[INDVARS_IV_NEXT79:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US:%.*]] ], [ [[BC_RESUME_VAL56]], [[VEC_EPILOG_SCALAR_PH46]] ] +; CHECK-NEXT: [[INDVARS_IV70:%.*]] = phi i64 [ [[INDVARS_IV_NEXT71:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US]] ], [ [[BC_RESUME_VAL59]], [[VEC_EPILOG_SCALAR_PH46]] ] +; CHECK-NEXT: [[TMP56:%.*]] = sub nsw i64 8, [[INDVARS_IV78]] +; CHECK-NEXT: [[ADD_PTR_US:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 [[INDVARS_IV78]] +; CHECK-NEXT: [[TMP57:%.*]] = add nsw i64 [[TMP56]], [[INDVARS_IV70]] +; CHECK-NEXT: [[ARRAYDECAY_US_US_US:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR_US]], i64 [[TMP57]], i64 0 +; CHECK-NEXT: br i1 [[TOBOOL6]], label [[FOR_BODY5_US_US_US_PREHEADER:%.*]], label [[FOR_BODY5_US_US48_PREHEADER:%.*]] +; CHECK: for.body5.us.us48.preheader: +; CHECK-NEXT: store i32 8, ptr [[ARRAYDECAY_US_US_US]], align 16 +; CHECK-NEXT: [[INDVARS_IV_NEXT66:%.*]] = or disjoint i64 [[INDVARS_IV70]], 1 +; CHECK-NEXT: [[TMP58:%.*]] = add nsw i64 [[TMP56]], [[INDVARS_IV_NEXT66]] +; CHECK-NEXT: [[ARRAYDECAY_US_US55_1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR_US]], i64 [[TMP58]], i64 0 +; CHECK-NEXT: store i32 8, ptr [[ARRAYDECAY_US_US55_1]], align 8 +; CHECK-NEXT: br label [[FOR_COND_CLEANUP4_US_LCSSA_US_US]] +; CHECK: for.body5.us.us.us.preheader: +; CHECK-NEXT: store i32 7, ptr [[ARRAYDECAY_US_US_US]], align 16 +; CHECK-NEXT: [[INDVARS_IV_NEXT73:%.*]] = or disjoint i64 [[INDVARS_IV70]], 1 +; CHECK-NEXT: [[TMP59:%.*]] = add nsw i64 [[TMP56]], [[INDVARS_IV_NEXT73]] +; CHECK-NEXT: [[ARRAYDECAY_US_US_US_1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR_US]], i64 [[TMP59]], i64 0 +; CHECK-NEXT: store i32 7, ptr [[ARRAYDECAY_US_US_US_1]], align 8 +; CHECK-NEXT: br label [[FOR_COND_CLEANUP4_US_LCSSA_US_US]] +; CHECK: for.cond.cleanup4.us-lcssa.us.us: +; CHECK-NEXT: [[INDVARS_IV_NEXT79]] = add nuw nsw i64 [[INDVARS_IV78]], 2 +; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT79]], [[TMP3]] +; CHECK-NEXT: [[INDVARS_IV_NEXT71]] = add nuw nsw i64 [[INDVARS_IV70]], 2 +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_BODY_US]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup.loopexit99: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV95:%.*]] = phi i64 [ [[INDVARS_IV_NEXT96:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL10]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[INDVARS_IV87:%.*]] = phi i64 [ [[INDVARS_IV_NEXT88:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL13]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[TMP60:%.*]] = sub nsw i64 8, [[INDVARS_IV95]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 [[INDVARS_IV95]] +; CHECK-NEXT: [[TMP61:%.*]] = add nsw i64 [[TMP60]], [[INDVARS_IV87]] +; CHECK-NEXT: [[ARRAYDECAY_US31:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR]], i64 [[TMP61]], i64 0 +; CHECK-NEXT: store i32 8, ptr [[ARRAYDECAY_US31]], align 16 +; CHECK-NEXT: [[INDVARS_IV_NEXT90:%.*]] = or disjoint i64 [[INDVARS_IV87]], 1 +; CHECK-NEXT: [[TMP62:%.*]] = add nsw i64 [[TMP60]], [[INDVARS_IV_NEXT90]] +; CHECK-NEXT: [[ARRAYDECAY_US31_1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR]], i64 [[TMP62]], i64 0 +; CHECK-NEXT: store i32 8, ptr [[ARRAYDECAY_US31_1]], align 8 +; CHECK-NEXT: [[INDVARS_IV_NEXT96]] = add nuw nsw i64 [[INDVARS_IV95]], 2 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT96]], [[TMP3]] +; CHECK-NEXT: [[INDVARS_IV_NEXT88]] = add nuw nsw i64 [[INDVARS_IV87]], 2 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT99]], !llvm.loop [[LOOP7:![0-9]+]] ; entry: %0 = load i32, ptr @c, align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index dc474fbf67ce8b..99aaecf9a073c4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -118,7 +118,7 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -410,7 +410,7 @@ define void @example23b(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[OFFSET_IDX4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw <4 x i32> [[TMP1]], splat (i32 7) ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[NEXT_GEP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 @@ -457,7 +457,7 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[VEC_IV]], splat (i64 257) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index 4e9b97ef928b13..9c42c3769ec177 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -164,10 +164,10 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP137:%.*]] = mul nsw <8 x i32> [[TMP103]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP138:%.*]] = mul nsw <8 x i32> [[TMP119]], [[WIDE_LOAD5]] ; CHECK-NEXT: [[TMP139:%.*]] = mul nsw <8 x i32> [[TMP135]], [[WIDE_LOAD6]] -; CHECK-NEXT: [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], -; CHECK-NEXT: [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], -; CHECK-NEXT: [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], +; CHECK-NEXT: [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], splat (i32 4) +; CHECK-NEXT: [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], splat (i32 4) +; CHECK-NEXT: [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], splat (i32 4) +; CHECK-NEXT: [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], splat (i32 4) ; CHECK-NEXT: [[TMP144]] = add <8 x i32> [[TMP140]], [[TMP136]] ; CHECK-NEXT: [[TMP145]] = add <8 x i32> [[TMP141]], [[TMP137]] ; CHECK-NEXT: [[TMP146]] = add <8 x i32> [[TMP142]], [[TMP138]] @@ -356,10 +356,10 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP137:%.*]] = mul nsw <8 x i32> [[TMP103]], [[WIDE_LOAD4]] ; MAX-BW-NEXT: [[TMP138:%.*]] = mul nsw <8 x i32> [[TMP119]], [[WIDE_LOAD5]] ; MAX-BW-NEXT: [[TMP139:%.*]] = mul nsw <8 x i32> [[TMP135]], [[WIDE_LOAD6]] -; MAX-BW-NEXT: [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], -; MAX-BW-NEXT: [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], -; MAX-BW-NEXT: [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], -; MAX-BW-NEXT: [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], +; MAX-BW-NEXT: [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], splat (i32 4) +; MAX-BW-NEXT: [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], splat (i32 4) +; MAX-BW-NEXT: [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], splat (i32 4) +; MAX-BW-NEXT: [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], splat (i32 4) ; MAX-BW-NEXT: [[TMP144]] = add <8 x i32> [[TMP140]], [[TMP136]] ; MAX-BW-NEXT: [[TMP145]] = add <8 x i32> [[TMP141]], [[TMP137]] ; MAX-BW-NEXT: [[TMP146]] = add <8 x i32> [[TMP142]], [[TMP138]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll index 68c0b8cbc2c4a9..de906598775eb0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -17,7 +17,7 @@ define dso_local void @tail_folding_enabled(ptr noalias nocapture %A, ptr noalia ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 429) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison) @@ -85,7 +85,7 @@ define dso_local void @tail_folding_disabled(ptr noalias nocapture %A, ptr noali ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 429) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison) diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll index fa0d529116d109..45594b03353361 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -291,8 +291,8 @@ define void @uniform_copy(ptr %A, ptr %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !alias.scope !12 -; CHECK-NEXT: store i32 [[TMP0]], ptr [[B]], align 4, !alias.scope !15, !noalias !12 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META12:![0-9]+]] +; CHECK-NEXT: store i32 [[TMP0]], ptr [[B]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META12]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] @@ -349,9 +349,9 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP39:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 @@ -376,10 +376,10 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP11]], align 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x i8> poison, i8 [[TMP15]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT11]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = urem <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP17:%.*]] = urem <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP18:%.*]] = urem <4 x i64> [[STEP_ADD1]], -; CHECK-NEXT: [[TMP19:%.*]] = urem <4 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[TMP16:%.*]] = urem <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[TMP17:%.*]] = urem <4 x i64> [[STEP_ADD]], splat (i64 8) +; CHECK-NEXT: [[TMP18:%.*]] = urem <4 x i64> [[STEP_ADD_2]], splat (i64 8) +; CHECK-NEXT: [[TMP19:%.*]] = urem <4 x i64> [[STEP_ADD_3]], splat (i64 8) ; CHECK-NEXT: [[TMP20:%.*]] = trunc <4 x i64> [[TMP16]] to <4 x i8> ; CHECK-NEXT: [[TMP21:%.*]] = trunc <4 x i64> [[TMP17]] to <4 x i8> ; CHECK-NEXT: [[TMP22:%.*]] = trunc <4 x i64> [[TMP18]] to <4 x i8> @@ -388,10 +388,10 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[TMP25:%.*]] = lshr <4 x i8> [[BROADCAST_SPLAT8]], [[TMP21]] ; CHECK-NEXT: [[TMP26:%.*]] = lshr <4 x i8> [[BROADCAST_SPLAT10]], [[TMP22]] ; CHECK-NEXT: [[TMP27:%.*]] = lshr <4 x i8> [[BROADCAST_SPLAT12]], [[TMP23]] -; CHECK-NEXT: [[TMP28:%.*]] = and <4 x i8> [[TMP24]], -; CHECK-NEXT: [[TMP29:%.*]] = and <4 x i8> [[TMP25]], -; CHECK-NEXT: [[TMP30:%.*]] = and <4 x i8> [[TMP26]], -; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i8> [[TMP27]], +; CHECK-NEXT: [[TMP28:%.*]] = and <4 x i8> [[TMP24]], splat (i8 1) +; CHECK-NEXT: [[TMP29:%.*]] = and <4 x i8> [[TMP25]], splat (i8 1) +; CHECK-NEXT: [[TMP30:%.*]] = and <4 x i8> [[TMP26]], splat (i8 1) +; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i8> [[TMP27]], splat (i8 1) ; CHECK-NEXT: [[TMP32:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32> ; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i8> [[TMP29]] to <4 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = zext <4 x i8> [[TMP30]] to <4 x i32> @@ -401,7 +401,7 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[TMP38]] = add <4 x i32> [[VEC_PHI5]], [[TMP34]] ; CHECK-NEXT: [[TMP39]] = add <4 x i32> [[VEC_PHI6]], [[TMP35]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; CHECK-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index 094f80e287f0b2..9feeee004025c9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -114,7 +114,7 @@ define void @vectorized1(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 19) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison), !llvm.access.group [[ACC_GRP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll index c6a87fa21fb0cc..3af3ad9ab3cf5b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll @@ -16,7 +16,7 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 10000) ; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 6 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[B:%.*]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 @@ -91,7 +91,7 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 { ; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX97]], align 1, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 10000 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll index 5ec7c1d45f8cf1..85d6c801dee690 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll @@ -30,7 +30,7 @@ define void @test(ptr %A) { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[STRIDED_VEC]], splat (i32 2) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP13]], i32 0 ; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll index 12f95e0a0a7dc8..6f682c2d60f3f8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll @@ -14,9 +14,9 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 @@ -27,14 +27,14 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i64> [[STEP_ADD1]], -; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i64> [[STEP_ADD2]], -; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], -; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD5]], -; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD6]], +; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], splat (i64 128) +; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], splat (i64 128) +; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD5]], splat (i64 128) +; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD6]], splat (i64 128) ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP27]] @@ -47,7 +47,7 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP14]], ptr [[TMP34]], i32 4, <4 x i1> [[TMP18]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP15]], ptr [[TMP35]], i32 4, <4 x i1> [[TMP19]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -120,10 +120,10 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD1]], -; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD2]], -; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD3]], +; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], splat (i64 128) +; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD1]], splat (i64 128) +; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD2]], splat (i64 128) +; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD3]], splat (i64 128) ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP15]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP23]] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[TMP24]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll index d25b03943fa217..4174aa4e897bca 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -40,7 +40,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -124,7 +124,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -148,7 +148,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP3]], i32 1, <8 x i1> [[TMP0]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1016 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.body: @@ -213,7 +213,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -297,7 +297,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -322,7 +322,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP4]], i32 1, <8 x i1> [[TMP0]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -402,7 +402,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -486,7 +486,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP51]], ptr [[TMP52]], i32 1, <8 x i1> [[TMP3]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -521,7 +521,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP6]], i32 1, <8 x i1> [[TMP4]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -606,7 +606,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], splat (i32 3) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -690,7 +690,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP51]], ptr [[TMP52]], i32 1, <8 x i1> [[TMP3]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -725,7 +725,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP6]], i32 1, <8 x i1> [[TMP4]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -792,7 +792,7 @@ define dso_local void @unconditional_strided1_optsize(ptr noalias nocapture read ; DISABLED_MASKED_STRIDED: vector.body: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP1]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP0]], i64 1 @@ -828,7 +828,7 @@ define dso_local void @unconditional_strided1_optsize(ptr noalias nocapture read ; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: store <8 x i8> [[TMP32]], ptr [[TMP33]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP34]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -906,7 +906,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -990,7 +990,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]]) ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -1088,7 +1088,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -1169,7 +1169,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] ; DISABLED_MASKED_STRIDED: pred.load.continue14: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if15: @@ -1397,7 +1397,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -1425,7 +1425,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP6]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -1493,7 +1493,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -1574,7 +1574,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] ; DISABLED_MASKED_STRIDED: pred.load.continue14: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if15: @@ -1802,7 +1802,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 -8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -1818,7 +1818,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; ENABLED_MASKED_STRIDED: pred.load.if: @@ -1899,7 +1899,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] ; ENABLED_MASKED_STRIDED: pred.load.continue14: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], splat (i32 1) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] ; ENABLED_MASKED_STRIDED: pred.load.if15: @@ -2127,7 +2127,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; ENABLED_MASKED_STRIDED: pred.store.continue60: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 -8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -2213,7 +2213,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -2294,7 +2294,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]] ; DISABLED_MASKED_STRIDED: pred.load.continue16: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = or disjoint <8 x i32> [[TMP2]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = or disjoint <8 x i32> [[TMP2]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if17: @@ -2522,7 +2522,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE62]] ; DISABLED_MASKED_STRIDED: pred.store.continue62: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP167]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -2559,7 +2559,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP8]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -2647,7 +2647,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -2728,7 +2728,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] ; DISABLED_MASKED_STRIDED: pred.load.continue14: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], splat (i32 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if15: @@ -2956,7 +2956,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll index b850dc3ecef85d..bd54f87b504896 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll @@ -27,7 +27,7 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], splat (i64 2) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i64 1 @@ -46,7 +46,7 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP13]], ptr [[TMP9]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 2 -; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = or disjoint <4 x i64> [[TMP1]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = or disjoint <4 x i64> [[TMP1]], splat (i64 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP15]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP16]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP15]], i64 1 @@ -64,7 +64,7 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur ; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP27]], ptr [[TMP23]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -141,7 +141,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP1]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison) -; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nsw <4 x i64> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nsw <4 x i64> [[VEC_IND]], splat (i64 2) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if: @@ -180,7 +180,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; DISABLED_MASKED_STRIDED: pred.store.continue6: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP19]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison) -; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = or disjoint <4 x i64> [[TMP2]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = or disjoint <4 x i64> [[TMP2]], splat (i64 1) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if8: @@ -218,7 +218,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE15]] ; DISABLED_MASKED_STRIDED: pred.store.continue15: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP37]], label [[FOR_END_LOOPEXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end.loopexit: @@ -247,7 +247,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP1]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nsw i64 [[INDEX]], 3 -; ENABLED_MASKED_STRIDED-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[POINTS:%.+]], i64 [[TMP2]] +; ENABLED_MASKED_STRIDED-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[POINTS:%.*]], i64 [[TMP2]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP3]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison) ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_MASKED_LOAD]], <4 x i16> [[WIDE_MASKED_LOAD3]], <16 x i32> @@ -314,7 +314,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer -; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], splat (i64 3) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if: @@ -352,7 +352,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; DISABLED_MASKED_STRIDED: pred.store.continue6: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP19]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DISABLED_MASKED_STRIDED: for.end: @@ -367,7 +367,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], splat (i64 3) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if: @@ -405,7 +405,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; ENABLED_MASKED_STRIDED: pred.store.continue6: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP19]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll index 6ed398bb5bea50..5e4ba0b6d33d95 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll @@ -26,7 +26,7 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" ;CHECK-NEXT: %index = phi i32 ;CHECK-NEXT: %[[VECIND:.+]] = phi <8 x i32> [ ;CHECK-NEXT: %[[VMASK:.+]] = icmp ugt <8 x i32> %[[VECIND]], %{{broadcast.splat*}} -;CHECK-NEXT: %{{.*}} = shl nuw nsw <8 x i32> %[[VECIND]], +;CHECK-NEXT: %{{.*}} = shl nuw nsw <8 x i32> %[[VECIND]], splat (i32 1) ;CHECK-NEXT: %[[M:.+]] = extractelement <8 x i1> %[[VMASK]], i32 0 ;CHECK-NEXT: br i1 %[[M]], label %pred.store.if, label %pred.store.continue ;CHECK-NOT: %{{.+}} = load <16 x i8>, ptr %{{.*}}, align 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index eee1b6f35d1b70..126bbf9afc34e8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -227,7 +227,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE2]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_UDIV_CONTINUE2]] ] -; CHECK-NEXT: [[TMP0:%.*]] = mul <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 777) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; CHECK: pred.udiv.if: @@ -253,7 +253,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: @@ -301,7 +301,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE14:%.*]] ] ; SINK-GATHER-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE14]] ] ; SINK-GATHER-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP66:%.*]], [[PRED_UDIV_CONTINUE14]] ] -; SINK-GATHER-NEXT: [[TMP0:%.*]] = mul <8 x i64> [[VEC_IND]], +; SINK-GATHER-NEXT: [[TMP0:%.*]] = mul <8 x i64> [[VEC_IND]], splat (i64 777) ; SINK-GATHER-NEXT: [[TMP1:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 0 ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; SINK-GATHER: pred.udiv.if: @@ -393,7 +393,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[BROADCAST_SPLAT]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]] ; SINK-GATHER-NEXT: [[TMP66]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]] ; SINK-GATHER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; SINK-GATHER-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; SINK-GATHER-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; SINK-GATHER-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-GATHER-NEXT: br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SINK-GATHER: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll index 0ca3e05d77c044..c81f48ff62afc3 100644 --- a/llvm/test/Transforms/LoopVectorize/assume.ll +++ b/llvm/test/Transforms/LoopVectorize/assume.ll @@ -5,8 +5,8 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK: vector.body: ; CHECK: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr {{.*}}, align 4 ; CHECK: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr {{.*}}, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD1]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+02) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+02) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP3]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll index 8b06d9f61e5da1..4c95584ff253a4 100644 --- a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll +++ b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll @@ -115,7 +115,7 @@ define i64 @invar_cond(i1 %c) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> splat (i64 1) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -174,8 +174,8 @@ define i64 @invar_cond_incoming_ops_reordered(i1 %c) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> , <4 x i64> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> splat (i64 1), <4 x i64> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll index f2111081ffca9c..8397b1ed88dcdd 100644 --- a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll +++ b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll @@ -17,9 +17,9 @@ define i32 @foo(ptr nocapture %A) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shl nsw <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[TMP0:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 8) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1 @@ -33,7 +33,7 @@ define i32 @foo(ptr nocapture %A) { ; CHECK-NEXT: store i32 4, ptr [[TMP7]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -41,7 +41,7 @@ define i32 @foo(ptr nocapture %A) { ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll index 782efb7acc644f..deca8ea161cc4c 100644 --- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll @@ -45,7 +45,7 @@ define void @redundant_iv_cast(ptr %dst) { ; VF4: vector.body: ; VF4: [[VEC_IND:%.+]] = phi <4 x i16> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.+]], %vector.body ] ; VF4: store <4 x i16> [[VEC_IND]] -; VF4: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], +; VF4: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; ; IC2-LABEL: @redundant_iv_cast ; IC2: vector.body: @@ -120,7 +120,7 @@ define void @cast_induction_tail_folding(ptr %A) { ; VF4-LABEL: @cast_induction_tail_folding( ; VF4: [[INDEX:%.+]] = phi i32 [ 0, %vector.ph ] ; VF4-NEXT: [[VEC_IND:%.+]] = phi <4 x i32> [ , %vector.ph ] -; VF4-NEXT: = icmp ule <4 x i32> [[VEC_IND]], +; VF4-NEXT: = icmp ule <4 x i32> [[VEC_IND]], splat (i32 2) ; VF4-NEXT: = sext <4 x i32> [[VEC_IND]] to <4 x i64> ; IC2-LABEL: @cast_induction_tail_folding( diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll index aea383226c71e6..c745b4f74786c9 100644 --- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll +++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll @@ -49,7 +49,7 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) { ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDUCTION_IV_LCSSA1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> , [[TMP4]] +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4 ; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll index 3bf2591391121e..66aceab9fb27c8 100644 --- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll @@ -17,21 +17,21 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK: for.cond5.preheader1: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> ), !dbg [[DBG22:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> , <4 x ptr> [[TMP1]], i32 4, <4 x i1> ), !dbg [[DBG22]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 2, !dbg [[DBG22]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> , <4 x ptr> [[TMP2]], i32 4, <4 x i1> ), !dbg [[DBG22]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 2), <4 x ptr> [[TMP2]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 3, !dbg [[DBG22]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> , <4 x ptr> [[TMP3]], i32 4, <4 x i1> ), !dbg [[DBG22]] -; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], , !dbg [[DBG24:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i64> [[TMP4]], , !dbg [[DBG25:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 3), <4 x ptr> [[TMP3]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]] +; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1), !dbg [[DBG24:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i64> [[TMP4]], splat (i64 5), !dbg [[DBG25:![0-9]+]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0, !dbg [[DBG26:![0-9]+]] ; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_COND_CLEANUP32]], label [[FOR_COND5_PREHEADER1]], !dbg [[DBG26]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]], !dbg [[DBG21]] ; CHECK: scalar.ph: @@ -45,19 +45,19 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: [[L_022:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ [[INC10:%.*]], [[FOR_COND5_PREHEADER]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[H]], i64 [[L_022]] ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !dbg [[DBG22]] -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[TMP10]], i64 1, !dbg [[DBG33:![0-9]+]] +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[TMP10]], i64 1, !dbg [[DBG31:![0-9]+]] ; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX_1]], align 4, !dbg [[DBG22]] -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, ptr [[TMP10]], i64 2, !dbg [[DBG33]] +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, ptr [[TMP10]], i64 2, !dbg [[DBG31]] ; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX_2]], align 4, !dbg [[DBG22]] -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr i32, ptr [[TMP10]], i64 3, !dbg [[DBG33]] +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr i32, ptr [[TMP10]], i64 3, !dbg [[DBG31]] ; CHECK-NEXT: store i32 3, ptr [[ARRAYIDX_3]], align 4, !dbg [[DBG22]] ; CHECK-NEXT: [[INC10]] = add nuw nsw i64 [[L_022]], 1, !dbg [[DBG24]] ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC10]], 5, !dbg [[DBG25]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_COND5_PREHEADER]], !dbg [[DBG26]] ; CHECK: for.cond.cleanup3: -; CHECK-NEXT: [[INC13]] = add nuw nsw i64 [[I_023]], 1, !dbg [[DBG27:![0-9]+]] +; CHECK-NEXT: [[INC13]] = add nuw nsw i64 [[I_023]], 1, !dbg [[DBG32:![0-9]+]] ; CHECK-NEXT: #dbg_value(i64 [[INC13]], [[META11]], !DIExpression(), [[META20]]) -; CHECK-NEXT: [[EXITCOND24_NOT:%.*]] = icmp eq i64 [[INC13]], 23, !dbg [[DBG28:![0-9]+]] +; CHECK-NEXT: [[EXITCOND24_NOT:%.*]] = icmp eq i64 [[INC13]], 23, !dbg [[DBG33:![0-9]+]] ; CHECK-NEXT: br i1 [[EXITCOND24_NOT]], label [[EXIT]], label [[FOR_COND1_PREHEADER]], !dbg [[DBG21]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void, !dbg [[DBG35:![0-9]+]] @@ -160,13 +160,13 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) ; CHECK: [[DBG24]] = !DILocation(line: 11, column: 32, scope: [[META19]]) ; CHECK: [[DBG25]] = !DILocation(line: 11, column: 26, scope: [[META19]]) ; CHECK: [[DBG26]] = !DILocation(line: 11, column: 5, scope: [[META15]]) -; CHECK: [[LOOP29]] = distinct !{[[LOOP29]], [[DBG21]], [[META30:![0-9]+]], [[META31:![0-9]+]], [[META32:![0-9]+]]} -; CHECK: [[META30]] = !DILocation(line: 13, column: 13, scope: [[META12]]) -; CHECK: [[META31]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META32]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[DBG33]] = !DILocation(line: 13, column: 2, scope: [[META23]]) -; CHECK: [[DBG27]] = !DILocation(line: 10, column: 30, scope: [[META16]]) -; CHECK: [[DBG28]] = !DILocation(line: 10, column: 24, scope: [[META16]]) -; CHECK: [[LOOP34]] = distinct !{[[LOOP34]], [[DBG21]], [[META30]], [[META31]]} +; CHECK: [[LOOP27]] = distinct !{[[LOOP27]], [[DBG21]], [[META28:![0-9]+]], [[META29:![0-9]+]], [[META30:![0-9]+]]} +; CHECK: [[META28]] = !DILocation(line: 13, column: 13, scope: [[META12]]) +; CHECK: [[META29]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META30]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[DBG31]] = !DILocation(line: 13, column: 2, scope: [[META23]]) +; CHECK: [[DBG32]] = !DILocation(line: 10, column: 30, scope: [[META16]]) +; CHECK: [[DBG33]] = !DILocation(line: 10, column: 24, scope: [[META16]]) +; CHECK: [[LOOP34]] = distinct !{[[LOOP34]], [[DBG21]], [[META28]], [[META29]]} ; CHECK: [[DBG35]] = !DILocation(line: 14, column: 1, scope: [[DBG4]]) ;. diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll index 8b5dd4211d8505..c2edb54f6f3b57 100644 --- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll @@ -46,16 +46,16 @@ define i64 @dead_instructions_01(ptr %a, i64 %n) { ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[R:%.*]] = phi i64 [ [[TMP2:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[R:%.*]] = phi i64 [ [[TMP4:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I]] -; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2]] = add i64 [[TMP1]], [[R]] +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP4]] = add i64 [[TMP2]], [[R]] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ [[TMP2]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ [[TMP4]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[TMP5]] ; entry: br label %for.body @@ -155,8 +155,8 @@ define void @dead_load_and_vector_pointer(ptr %a, ptr %b) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8, !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP5]], align 8, !alias.scope [[META6]], !noalias [[META9]] -; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD2]], +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD2]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr [[TMP4]], align 4, !alias.scope [[META6]], !noalias [[META9]] ; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP5]], align 4, !alias.scope [[META6]], !noalias [[META9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/debugloc.ll b/llvm/test/Transforms/LoopVectorize/debugloc.ll index cc305781c62417..e28dd77eadd529 100644 --- a/llvm/test/Transforms/LoopVectorize/debugloc.ll +++ b/llvm/test/Transforms/LoopVectorize/debugloc.ll @@ -61,8 +61,8 @@ define i32 @test_debug_loc_on_branch_in_loop(ptr noalias %src, ptr noalias %dst) ; CHECK-LABEL: define i32 @test_debug_loc_on_branch_in_loop( ; CHECK-LABEL: vector.body: ; CHECK: [[LOAD:%.+]] = load <2 x i32>, ptr {{.+}}, align 4 -; CHECK-NEXT: [[CMP:%.+]] = icmp eq <2 x i32> [[LOAD]], -; CHECK-NEXT: [[XOR:%.+]] = xor <2 x i1> [[CMP:%.+]], , !dbg [[LOC3:!.+]] +; CHECK-NEXT: [[CMP:%.+]] = icmp eq <2 x i32> [[LOAD]], splat (i32 10) +; CHECK-NEXT: [[XOR:%.+]] = xor <2 x i1> [[CMP:%.+]], splat (i1 true), !dbg [[LOC3:!.+]] ; CHECK-NEXT: [[EXT:%.+]] = extractelement <2 x i1> [[XOR]], i32 0, !dbg [[LOC3]] ; CHECK-NEXT: br i1 [[EXT]], label %pred.store.if, label %pred.store.continue ; CHECK-NOT: !dbg @@ -101,8 +101,8 @@ define i32 @test_different_debug_loc_on_replicate_recipe(ptr noalias %src, ptr n ; CHECK-LABEL: define i32 @test_different_debug_loc_on_replicate_recipe( ; CHECK-LABEL: vector.body: ; CHECK: [[LOAD:%.+]] = load <2 x i32>, ptr {{.+}}, align 4 -; CHECK-NEXT: [[CMP:%.+]] = icmp eq <2 x i32> [[LOAD]], -; CHECK-NEXT: [[XOR:%.+]] = xor <2 x i1> [[CMP:%.+]], , !dbg [[LOC4:!.+]] +; CHECK-NEXT: [[CMP:%.+]] = icmp eq <2 x i32> [[LOAD]], splat (i32 10) +; CHECK-NEXT: [[XOR:%.+]] = xor <2 x i1> [[CMP:%.+]], splat (i1 true), !dbg [[LOC4:!.+]] ; CHECK-NEXT: [[EXT:%.+]] = extractelement <2 x i1> [[XOR]], i32 0, !dbg [[LOC4]] ; CHECK-NEXT: br i1 [[EXT]], label %pred.store.if, label %pred.store.continue ; CHECK-NOT: !dbg @@ -146,7 +146,7 @@ define void @test_misc(ptr nocapture %a, ptr noalias %b, i64 %size) !dbg !35 { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr %b, i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <2 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <2 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> zeroinitializer, !dbg [[LOC6:![0-9]+]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0, !dbg [[LOC7:![0-9]+]] ; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[TMP6]], align 4, !dbg [[LOC7]] diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll index 274174cceeb1c2..1e37b7be662dfd 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -19,9 +19,9 @@ define dso_local void @constTC(ptr noalias nocapture %A) optsize { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4 -; CHECK-NEXT: store <2 x i32> , ptr [[TMP6]], align 1 -; CHECK-NEXT: store <2 x i32> , ptr [[TMP7]], align 1 -; CHECK-NEXT: store <2 x i32> , ptr [[TMP8]], align 1 +; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP6]], align 1 +; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP7]], align 1 +; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 6 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1800 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll index 76ca2507b914cf..53686ee76cbbdb 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll @@ -21,7 +21,7 @@ define dso_local void @alignTC(ptr noalias nocapture %A, i32 %n) optsize { ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP2]], align 1 +; CHECK-NEXT: store <4 x i32> splat (i32 13), ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -37,7 +37,7 @@ define dso_local void @alignTC(ptr noalias nocapture %A, i32 %n) optsize { ; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], [[ALIGNEDTC]] -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -86,7 +86,7 @@ define dso_local void @assumeAlignedTC(ptr noalias nocapture %A, i32 %p, i32 %q) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP2]], align 1 +; CHECK-NEXT: store <4 x i32> splat (i32 13), ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -198,7 +198,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index 5f2e91b8d1f32d..8afde74cf277dc 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -223,10 +223,10 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3]] = or <4 x i1> [[VEC_PHI]], [[TMP2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -261,10 +261,10 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND11:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[VEC_IND11]], [[BROADCAST_SPLAT14]] -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) ; CHECK-NEXT: [[TMP10]] = or <4 x i1> [[VEC_PHI10]], [[TMP9]] ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX9]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <4 x i32> [[VEC_IND11]], +; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <4 x i32> [[VEC_IND11]], splat (i32 4) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -359,7 +359,7 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <4 x i64> [[TMP19]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP20]], +; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP20]], splat (i1 true) ; CHECK-NEXT: [[RDX_SELECT_CMP]] = or <4 x i1> [[VEC_PHI]], [[TMP21]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -414,7 +414,7 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i64> [[TMP39]], i64 [[TMP36]], i32 2 ; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i64> [[TMP40]], i64 [[TMP37]], i32 3 ; CHECK-NEXT: [[TMP42:%.*]] = icmp eq <4 x i64> [[TMP41]], [[BROADCAST_SPLAT19]] -; CHECK-NEXT: [[TMP46:%.*]] = xor <4 x i1> [[TMP42]], +; CHECK-NEXT: [[TMP46:%.*]] = xor <4 x i1> [[TMP42]], splat (i1 true) ; CHECK-NEXT: [[TMP43]] = or <4 x i1> [[VEC_PHI12]], [[TMP46]] ; CHECK-NEXT: [[INDEX_NEXT20]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT20]], [[N_VEC8]] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll index f78654176e23eb..3c1dd1bd8b6d13 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -201,7 +201,7 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 65535) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2 @@ -228,7 +228,7 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ [[TMP12]], [[VEC_EPILOG_PH]] ], [ [[TMP20:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i32> [[VEC_PHI2]], +; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i32> [[VEC_PHI2]], splat (i32 65535) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i32 [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i16>, ptr [[TMP16]], align 2 @@ -327,7 +327,7 @@ define float @multiple_fp_rdx(ptr %A, i64 %N) { ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 4 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> , float [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> splat (float 1.000000e+00), float [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> zeroinitializer, float [[BC_MERGE_RDX3]], i32 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll index 756b0ab9612b8b..7a92d1a1c9ea5f 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll @@ -30,7 +30,7 @@ define void @trunc_iv_steps_with_epilogue(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[WIDE_LOAD]], splat (i8 2) ; CHECK-NEXT: store <4 x i8> [[TMP9]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -54,7 +54,7 @@ define void @trunc_iv_steps_with_epilogue(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i8> [[WIDE_LOAD6]], +; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i8> [[WIDE_LOAD6]], splat (i8 2) ; CHECK-NEXT: store <4 x i8> [[TMP15]], ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]] diff --git a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll index 47636b2c66d296..f648de1358998e 100644 --- a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll +++ b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll @@ -13,7 +13,7 @@ define void @inv_store_last_lane(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -74,7 +74,7 @@ define float @ret_last_lane(ptr noalias nocapture %a, ptr noalias nocapture read ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index b0ece3980cdf24..7aedb218e13520 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -170,7 +170,7 @@ define void @test_first_order_recurrences_incoming_cycle_preheader(ptr %ptr) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP4]], splat (i16 10) ; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr [[TMP2]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 @@ -314,7 +314,7 @@ define i16 @test_chained_first_order_recurrences_3_for2_no_other_uses(ptr %ptr) ; CHECK-NEXT: [[TMP4]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i16> [[VECTOR_RECUR1]], <4 x i16> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR2]], <4 x i16> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[TMP4]], splat (i16 10) ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP7]], [[TMP6]] ; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP2]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -365,7 +365,7 @@ define i16 @test_chained_first_order_recurrences_3_for1_for2_no_other_uses(ptr % ; CHECK-NEXT: [[TMP4]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i16> [[VECTOR_RECUR1]], <4 x i16> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR2]], <4 x i16> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP6]], splat (i16 10) ; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP2]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 @@ -413,7 +413,7 @@ define double @test_chained_first_order_recurrence_sink_users_1(ptr %ptr) { ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> , [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x double> [[TMP6]], [[TMP4]] ; CHECK-NEXT: store <4 x double> [[TMP7]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -480,11 +480,11 @@ define i64 @test_first_order_recurrences_and_induction(ptr %ptr) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[PTR:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], splat (i64 10) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body ; CHECK: middle.block: @@ -520,11 +520,11 @@ define i64 @test_first_order_recurrences_and_induction2(ptr %ptr) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[PTR:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], splat (i64 10) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index eda92aae095ddf..0f206bfb92fa01 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -445,9 +445,9 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses(ptr noalias ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[TMP4]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[DST_1:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP9]], align 4 @@ -535,10 +535,10 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses_chain(ptr n ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], splat (float 2.000000e+00) +; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[TMP4]], +; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[TMP4]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[DST_1:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0 ; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[TMP10]], align 4 @@ -696,8 +696,8 @@ define i16 @multiple_exit(ptr %p, i32 %n) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -772,8 +772,8 @@ define i16 @multiple_exit2(ptr %p, i32 %n) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -841,8 +841,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP5]], <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], splat (i32 213) +; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> splat (i32 22) ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -852,8 +852,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] @@ -922,9 +922,9 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP5]], <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <4 x i32> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], splat (i32 2) +; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP8]], splat (i32 99) +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <4 x i32> [[TMP7]], splat (i32 213) ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP7]], <4 x i32> [[TMP9]] ; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -1045,7 +1045,7 @@ define void @test_for_sink_instruction_after_same_incoming_1(ptr %ptr) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> , [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> [[TMP4]], [[TMP5]] ; CHECK-NEXT: store <4 x double> [[TMP6]], ptr [[TMP2]], align 8 @@ -1114,7 +1114,7 @@ define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) { ; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[WIDE_LOAD]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> [[TMP5]], [[TMP3]] ; CHECK-NEXT: store <4 x double> [[TMP6]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -1125,7 +1125,7 @@ define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll index 20acc105353236..c37d2c999d97aa 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll @@ -230,11 +230,11 @@ define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias % ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4]] = xor <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], splat (i32 10) +; CHECK-NEXT: [[TMP4]] = xor <4 x i32> splat (i32 12), [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP2]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], splat (i32 255) ; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i32> [[TMP7]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 @@ -405,11 +405,11 @@ define void @hoist_previous_value_and_operand(ptr %dst, i64 %mask) { ; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4]] = trunc <4 x i64> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6]] = or <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP6]] = or <4 x i32> [[TMP5]], splat (i32 3) ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[TMP6]], <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 336 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index a90594085d3cfe..6c94f5bfc836a7 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -901,9 +901,9 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 -4) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -996,7 +996,7 @@ define i32 @PR27246() { ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) ; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SINK-AFTER: middle.block: @@ -1240,7 +1240,7 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ , [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ splat (i64 1), [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1297,7 +1297,7 @@ define i64 @constant_folded_previous_value() { ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ , [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ splat (i64 1), [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1351,10 +1351,10 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP0]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 ; UNROLL-NO-IC-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -1423,7 +1423,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[TMP0]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 ; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SINK-AFTER: middle.block: @@ -1978,8 +1978,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP7]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4 -; UNROLL-NO-IC-NEXT: store <4 x i32> , ptr [[TMP17]], align 4 -; UNROLL-NO-IC-NEXT: store <4 x i32> , ptr [[TMP18]], align 4 +; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP17]], align 4 +; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP18]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP9]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = load i16, ptr [[TMP10]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP11]], align 2 @@ -2120,7 +2120,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1 ; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1 ; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; SINK-AFTER-NEXT: store <4 x i32> , ptr [[TMP9]], align 4 +; SINK-AFTER-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP9]], align 4 ; SINK-AFTER-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP5]], align 2 ; SINK-AFTER-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP6]], align 2 ; SINK-AFTER-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP7]], align 2 @@ -2221,8 +2221,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP7]], -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP7]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP11]] @@ -2339,7 +2339,7 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP3]], align 2 ; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], +; SINK-AFTER-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], splat (i32 2) ; SINK-AFTER-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP8:%.*]] = mul nsw <4 x i32> [[TMP6]], [[TMP7]] ; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] @@ -2510,26 +2510,26 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]] ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add <4 x i16> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1) ; UNROLL-NO-IC-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP1]], -; UNROLL-NO-IC-NEXT: [[TMP5]] = add <4 x i16> [[TMP2]], +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP1]], splat (i16 5) +; UNROLL-NO-IC-NEXT: [[TMP5]] = add <4 x i16> [[TMP2]], splat (i16 5) ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP4]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = sub <4 x i16> [[TMP7]], +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], splat (i16 10) +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = sub <4 x i16> [[TMP7]], splat (i16 10) ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP10]], i32 4 ; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP11]], align 2 ; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP9]], ptr [[TMP12]], align 2 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 ; UNROLL-NO-IC-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -2618,16 +2618,16 @@ define void @sink_dead_inst(ptr %a) { ; SINK-AFTER-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]] ; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 -; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) ; SINK-AFTER-NEXT: [[TMP2]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP3]] = add <4 x i16> [[TMP1]], +; SINK-AFTER-NEXT: [[TMP3]] = add <4 x i16> [[TMP1]], splat (i16 5) ; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP3]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], +; SINK-AFTER-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], splat (i16 10) ; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[TMP0]] ; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP6]], i32 0 ; SINK-AFTER-NEXT: store <4 x i16> [[TMP5]], ptr [[TMP7]], align 2 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 ; SINK-AFTER-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; SINK-AFTER: middle.block: @@ -3001,7 +3001,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_STORE_CONTINUE30]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_STORE_CONTINUE30]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE30]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1 @@ -3148,7 +3148,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[TMP72:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP73:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI2]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP74:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP32:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -3357,7 +3357,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER: pred.store.continue13: ; SINK-AFTER-NEXT: [[TMP37:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; SINK-AFTER-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP32:![0-9]+]] ; SINK-AFTER: middle.block: @@ -3421,10 +3421,10 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1) ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]] @@ -3433,7 +3433,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -3521,14 +3521,14 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 ; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 -; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) ; SINK-AFTER-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] ; SINK-AFTER-NEXT: [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]] ; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 ; SINK-AFTER-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; SINK-AFTER-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; SINK-AFTER: middle.block: @@ -3592,11 +3592,11 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i16> [[STEP_ADD]], -; UNROLL-NO-IC-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1) +; UNROLL-NO-IC-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], splat (i16 5) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -3659,10 +3659,10 @@ define void @unused_recurrence(ptr %a) { ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], -; SINK-AFTER-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], +; SINK-AFTER-NEXT: [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) +; SINK-AFTER-NEXT: [[TMP1]] = add <4 x i16> [[TMP0]], splat (i16 5) ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1028 ; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; SINK-AFTER: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index 21aa9dc97187d7..89ac22b054e24d 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -539,7 +539,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00) ; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL1-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VEC4_INTERL1: middle.block: @@ -584,13 +584,13 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00) ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16 ; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], ptr [[TMP3]], align 4 ; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 4.000000e+00) ; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL2-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VEC4_INTERL2: middle.block: @@ -688,7 +688,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00) ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: @@ -783,14 +783,14 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND9]], ptr [[TMP6]], align 4 ; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] -; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -5.000000e-01) ; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[TMP8]], [[TMP7]] ; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: store <4 x float> [[TMP9]], ptr [[TMP10]], align 4 ; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: store <4 x float> [[TMP8]], ptr [[TMP11]], align 4 ; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float -2.000000e+00) ; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]] ; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL1-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -843,7 +843,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 ; VEC4_INTERL2-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[BROADCAST]], +; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[BROADCAST]], splat (float 4.000000e+00) ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 @@ -862,8 +862,8 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD11]], ptr [[TMP7]], align 4 ; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[BROADCAST]] ; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[STEP_ADD11]], [[BROADCAST]] -; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_IND]], -; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -5.000000e-01) +; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -2.500000e+00) ; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[TMP10]], [[TMP8]] ; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP9]] ; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] @@ -875,7 +875,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL2-NEXT: store <4 x float> [[TMP10]], ptr [[TMP16]], align 4 ; VEC4_INTERL2-NEXT: store <4 x float> [[TMP11]], ptr [[TMP17]], align 4 ; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float -4.000000e+00) ; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT13]] = fadd fast <4 x float> [[STEP_ADD11]], [[DOTSPLAT5]] ; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -1023,14 +1023,14 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND9]], ptr [[TMP6]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], splat (float -5.000000e-01) ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP8]], [[TMP7]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP9]], ptr [[TMP10]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP8]], ptr [[TMP11]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float -1.000000e+00) ; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -1121,7 +1121,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00) ; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL1-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VEC4_INTERL1: middle.block: @@ -1163,13 +1163,13 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00) ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16 ; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], ptr [[TMP3]], align 4 ; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], +; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 4.000000e+00) ; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC4_INTERL2-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VEC4_INTERL2: middle.block: @@ -1264,7 +1264,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00) ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VEC2_INTERL1_PRED_STORE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll index 4b771bf57f977a..c71ff2b9fa4c47 100644 --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -10,7 +10,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; ; CHECK-NOT: Found uniform instruction: %cond = icmp slt i64 %i.next, %n ; CHECK: vector.body -; CHECK: %[[I:.+]] = add nuw nsw <4 x i64> %vec.ind, +; CHECK: %[[I:.+]] = add nuw nsw <4 x i64> %vec.ind, splat (i64 1) ; CHECK: icmp slt <4 x i64> %[[I]], %broadcast.splat ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body ; diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll index 8b0c99b353c8b7..492eb091175e22 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -33,11 +33,11 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META3]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], -; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> -; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 20) +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], splat (i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> splat (i32 4), <4 x i32> splat (i32 5) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> splat (i32 3) +; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> splat (i32 9) ; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META0]], !noalias [[META3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -140,16 +140,16 @@ define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 19) +; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) ; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], splat (i32 4) +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> splat (i32 4), <4 x i32> splat (i32 5) +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> splat (i32 6), <4 x i32> splat (i32 11) ; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 3), <4 x i32> splat (i32 9) ; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP12]], <4 x i32> [[PREDPHI]] -; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 7), <4 x i32> splat (i32 18) ; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP13]], <4 x i32> [[PREDPHI4]] ; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META9]], !noalias [[META12]] ; CHECK-NEXT: store <4 x i32> [[PREDPHI5]], ptr [[TMP6]], align 4, !alias.scope [[META12]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll index ecb57c539a40ef..b8597b85f79bea 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -46,18 +46,18 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud, ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASR]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[AUR]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4, !alias.scope !5, !noalias !8 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope !12, !noalias !13 +; CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4, !alias.scope !14, !noalias !15 +; CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope !15 -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[WIDE_LOAD23]], -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i32> [[WIDE_LOAD24]], -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i32> [[WIDE_LOAD25]], -; CHECK-NEXT: [[TMP13:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META15]] +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], splat (i32 23) +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[WIDE_LOAD23]], splat (i32 24) +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i32> [[WIDE_LOAD24]], splat (i32 25) +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i32> [[WIDE_LOAD25]], splat (i32 26) +; CHECK-NEXT: [[TMP13:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 100) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; CHECK: pred.urem.if: @@ -108,19 +108,19 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud, ; CHECK-NEXT: [[TMP53:%.*]] = phi <2 x i32> [ [[TMP32]], [[PRED_UREM_CONTINUE]] ], [ [[TMP43]], [[PRED_UREM_IF26]] ] ; CHECK-NEXT: [[TMP54:%.*]] = phi <2 x i32> [ [[TMP33]], [[PRED_UREM_CONTINUE]] ], [ [[TMP47]], [[PRED_UREM_IF26]] ] ; CHECK-NEXT: [[TMP55:%.*]] = phi <2 x i32> [ [[TMP34]], [[PRED_UREM_CONTINUE]] ], [ [[TMP51]], [[PRED_UREM_IF26]] ] -; CHECK-NEXT: [[TMP56:%.*]] = xor <2 x i1> [[TMP13]], +; CHECK-NEXT: [[TMP56:%.*]] = xor <2 x i1> [[TMP13]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP56]], <2 x i32> [[TMP9]], <2 x i32> [[TMP52]] ; CHECK-NEXT: [[PREDPHI28:%.*]] = select <2 x i1> [[TMP56]], <2 x i32> [[TMP10]], <2 x i32> [[TMP53]] ; CHECK-NEXT: [[PREDPHI29:%.*]] = select <2 x i1> [[TMP56]], <2 x i32> [[TMP11]], <2 x i32> [[TMP54]] ; CHECK-NEXT: [[PREDPHI30:%.*]] = select <2 x i1> [[TMP56]], <2 x i32> [[TMP12]], <2 x i32> [[TMP55]] ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP57]], align 4, !alias.scope !5, !noalias !8 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP57]], align 4, !alias.scope [[META5]], !noalias [[META8]] ; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI28]], ptr [[TMP58]], align 4, !alias.scope !12, !noalias !13 +; CHECK-NEXT: store <2 x i32> [[PREDPHI28]], ptr [[TMP58]], align 4, !alias.scope [[META12]], !noalias [[META13]] ; CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI29]], ptr [[TMP59]], align 4, !alias.scope !14, !noalias !15 +; CHECK-NEXT: store <2 x i32> [[PREDPHI29]], ptr [[TMP59]], align 4, !alias.scope [[META14]], !noalias [[META15]] ; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI30]], ptr [[TMP60]], align 4, !alias.scope !15 +; CHECK-NEXT: store <2 x i32> [[PREDPHI30]], ptr [[TMP60]], align 4, !alias.scope [[META15]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP61:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP61]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -207,14 +207,14 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud, ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ASR]], i64 [[TMP1]] ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[AUR]], i64 [[TMP0]] ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[AUR]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !5, !noalias !8 -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !5, !noalias !8 -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope !12, !noalias !13 -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope !12, !noalias !13 -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !14, !noalias !15 -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !14, !noalias !15 -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope !15 -; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope !15 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META5]], !noalias [[META8]] +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope [[META12]], !noalias [[META13]] +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META14]], !noalias [[META15]] +; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META15]] +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META15]] ; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP10]], 23 ; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP11]], 23 ; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP12]], 24 @@ -259,14 +259,14 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud, ; UNROLL-NO-VF-NEXT: [[PREDPHI29:%.*]] = select i1 [[TMP45]], i32 [[TMP23]], i32 [[TMP42]] ; UNROLL-NO-VF-NEXT: [[PREDPHI30:%.*]] = select i1 [[TMP44]], i32 [[TMP24]], i32 [[TMP35]] ; UNROLL-NO-VF-NEXT: [[PREDPHI31:%.*]] = select i1 [[TMP45]], i32 [[TMP25]], i32 [[TMP43]] -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !5, !noalias !8 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI25]], ptr [[TMP3]], align 4, !alias.scope !5, !noalias !8 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI26]], ptr [[TMP4]], align 4, !alias.scope !12, !noalias !13 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI27]], ptr [[TMP5]], align 4, !alias.scope !12, !noalias !13 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI28]], ptr [[TMP6]], align 4, !alias.scope !14, !noalias !15 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI29]], ptr [[TMP7]], align 4, !alias.scope !14, !noalias !15 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI30]], ptr [[TMP8]], align 4, !alias.scope !15 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI31]], ptr [[TMP9]], align 4, !alias.scope !15 +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope [[META5]], !noalias [[META8]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI25]], ptr [[TMP3]], align 4, !alias.scope [[META5]], !noalias [[META8]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI26]], ptr [[TMP4]], align 4, !alias.scope [[META12]], !noalias [[META13]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI27]], ptr [[TMP5]], align 4, !alias.scope [[META12]], !noalias [[META13]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI28]], ptr [[TMP6]], align 4, !alias.scope [[META14]], !noalias [[META15]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI29]], ptr [[TMP7]], align 4, !alias.scope [[META14]], !noalias [[META15]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI30]], ptr [[TMP8]], align 4, !alias.scope [[META15]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI31]], ptr [[TMP9]], align 4, !alias.scope [[META15]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; UNROLL-NO-VF-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -371,12 +371,12 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META23:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope !23 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META23]] +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], splat (i32 23) +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 100) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]] ; CHECK: pred.sdiv.if: @@ -401,10 +401,10 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE4]] ; CHECK: pred.sdiv.continue4: ; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i32> [ [[TMP15]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP22]], [[PRED_SDIV_IF3]] ] -; CHECK-NEXT: [[TMP25:%.*]] = xor <2 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP25:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP5]], <2 x i32> [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP26]], align 4, !alias.scope !20, !noalias !23 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP26]], align 4, !alias.scope [[META20]], !noalias [[META23]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP27]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] @@ -448,12 +448,12 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]] ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23 -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !20, !noalias !23 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META23:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META20]], !noalias [[META23]] ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !23 -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !23 +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META23]] +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META23]] ; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP4]], 23 ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP5]], 23 ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP4]], 100 @@ -476,8 +476,8 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; UNROLL-NO-VF-NEXT: [[TMP23:%.*]] = xor i1 [[TMP13]], true ; UNROLL-NO-VF-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP22]], i32 [[TMP10]], i32 [[TMP17]] ; UNROLL-NO-VF-NEXT: [[PREDPHI4:%.*]] = select i1 [[TMP23]], i32 [[TMP11]], i32 [[TMP21]] -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope !20, !noalias !23 +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope [[META20]], !noalias [[META23]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope [[META20]], !noalias [[META23]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; UNROLL-NO-VF-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] @@ -552,15 +552,15 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META29:![0-9]+]], !noalias [[META32:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope !32 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP6]], , !dbg [[DBG34:![0-9]+]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP8]], <2 x i1> [[TMP7]], <2 x i1> zeroinitializer, !dbg [[DBG35:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META32]] +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], splat (i32 23) +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 100) +; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true), !dbg [[DBG34:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], splat (i32 200) +; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP8]], <2 x i1> zeroinitializer, !dbg [[DBG35:![0-9]+]] ; CHECK-NEXT: [[TMP10:%.*]] = or <2 x i1> [[TMP9]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]] @@ -586,11 +586,11 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; CHECK-NEXT: br label [[PRED_SDIV_CONTINUE4]] ; CHECK: pred.sdiv.continue4: ; CHECK-NEXT: [[TMP28:%.*]] = phi <2 x i32> [ [[TMP19]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP26]], [[PRED_SDIV_IF3]] ] -; CHECK-NEXT: [[TMP29:%.*]] = xor <2 x i1> [[TMP7]], , !dbg [[DBG35]] -; CHECK-NEXT: [[TMP30:%.*]] = select <2 x i1> [[TMP8]], <2 x i1> [[TMP29]], <2 x i1> zeroinitializer, !dbg [[DBG35]] +; CHECK-NEXT: [[TMP27:%.*]] = xor <2 x i1> [[TMP8]], splat (i1 true), !dbg [[DBG35]] +; CHECK-NEXT: [[TMP30:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP27]], <2 x i1> zeroinitializer, !dbg [[DBG35]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP30]], <2 x i32> [[TMP5]], <2 x i32> [[TMP28]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope !29, !noalias !32 +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope [[META29]], !noalias [[META32]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP32]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] @@ -636,22 +636,22 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]] ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32 -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !29, !noalias !32 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope [[META29:![0-9]+]], !noalias [[META32:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META29]], !noalias [[META32]] ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !32 -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !32 +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META32]] +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META32]] ; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP4]], 23 ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP5]], 23 ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP4]], 100 ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP5]], 100 -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true, !dbg [[DBG34:![0-9]+]] -; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true, !dbg [[DBG34]] -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp sge i32 [[TMP4]], 200 -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = icmp sge i32 [[TMP5]], 200 -; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = select i1 [[TMP16]], i1 [[TMP14]], i1 false, !dbg [[DBG35:![0-9]+]] -; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = select i1 [[TMP17]], i1 [[TMP15]], i1 false, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = xor i1 [[TMP12]], true, !dbg [[DBG34:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = xor i1 [[TMP13]], true, !dbg [[DBG34]] +; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp sge i32 [[TMP4]], 200, !dbg [[DBG34]] +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp sge i32 [[TMP5]], 200, !dbg [[DBG34]] +; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = select i1 [[TMP14]], i1 [[TMP16]], i1 false, !dbg [[DBG35:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = select i1 [[TMP15]], i1 [[TMP17]], i1 false, !dbg [[DBG35]] ; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP12]] ; UNROLL-NO-VF-NEXT: [[TMP21:%.*]] = or i1 [[TMP19]], [[TMP13]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP20]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]] @@ -668,14 +668,14 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.sdiv.continue3: ; UNROLL-NO-VF-NEXT: [[TMP29:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP27]], [[PRED_SDIV_IF2]] ] -; UNROLL-NO-VF-NEXT: [[TMP30:%.*]] = xor i1 [[TMP14]], true, !dbg [[DBG35]] -; UNROLL-NO-VF-NEXT: [[TMP31:%.*]] = xor i1 [[TMP15]], true, !dbg [[DBG35]] -; UNROLL-NO-VF-NEXT: [[TMP32:%.*]] = select i1 [[TMP16]], i1 [[TMP30]], i1 false, !dbg [[DBG35]] -; UNROLL-NO-VF-NEXT: [[TMP33:%.*]] = select i1 [[TMP17]], i1 [[TMP31]], i1 false, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP28:%.*]] = xor i1 [[TMP16]], true, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP30:%.*]] = xor i1 [[TMP17]], true, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP32:%.*]] = select i1 [[TMP14]], i1 [[TMP28]], i1 false, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i1 [[TMP30]], i1 false, !dbg [[DBG35]] ; UNROLL-NO-VF-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP32]], i32 [[TMP10]], i32 [[TMP25]] ; UNROLL-NO-VF-NEXT: [[PREDPHI4:%.*]] = select i1 [[TMP33]], i32 [[TMP11]], i32 [[TMP29]] -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32 -; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope !29, !noalias !32 +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope [[META29]], !noalias [[META32]] +; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope [[META29]], !noalias [[META32]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; UNROLL-NO-VF-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll b/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll index 3496ca343023e4..9882013e726ef3 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll @@ -17,10 +17,10 @@ for.cond.cleanup: ; preds = %if.end ; CHECK-LABEL: test ; CHECK: vector.body: -; CHECK: %{{.*}} = sdiv <2 x i32> %{{.*}}, -; CHECK: %{{.*}} = udiv <2 x i32> %{{.*}}, -; CHECK: %{{.*}} = srem <2 x i32> %{{.*}}, -; CHECK: %{{.*}} = urem <2 x i32> %{{.*}}, +; CHECK: %{{.*}} = sdiv <2 x i32> %{{.*}}, splat (i32 11) +; CHECK: %{{.*}} = udiv <2 x i32> %{{.*}}, splat (i32 13) +; CHECK: %{{.*}} = srem <2 x i32> %{{.*}}, splat (i32 17) +; CHECK: %{{.*}} = urem <2 x i32> %{{.*}}, splat (i32 19) ; CHECK-NOT: %{{.*}} = sdiv <2 x i32> %{{.*}}, ; CHECK-NOT: %{{.*}} = udiv <2 x i32> %{{.*}}, ; CHECK-NOT: %{{.*}} = srem <2 x i32> %{{.*}}, diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index 255768d8794c36..01eb535e6586cf 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -99,7 +99,7 @@ define i32 @test(ptr nocapture %f) #0 { ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[F:%.*]], i64 [[TMP0]] ; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; VEC-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], +; VEC-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], splat (i32 100) ; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; VEC-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VEC: pred.store.if: @@ -350,8 +350,8 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.continue2: -; VEC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[VEC_PHI]], -; VEC-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; VEC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 1) +; VEC-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; VEC-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP17]], <2 x i32> [[VEC_PHI]], <2 x i32> [[TMP16]] ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index 330cdeaeb7c275..220e62d9e37300 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -1216,13 +1216,13 @@ define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP9]] ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[WIDE_LOAD]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[TMP10]] = fadd fast <4 x float> [[PREDPHI1]], [[VEC_PHI]] @@ -1342,12 +1342,12 @@ define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonl ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP5]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[TMP6]] ; CHECK-NEXT: [[PREDPHI1]] = select <4 x i1> [[TMP9]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] @@ -1604,7 +1604,7 @@ define i64 @fcmp_0_add_select2(ptr noalias %x, i64 %N) nounwind readonly { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[VEC_PHI]], +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[VEC_PHI]], splat (i64 2) ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP4]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1686,7 +1686,7 @@ define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[REVERSE]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[VEC_PHI]], splat (i32 2) ; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1764,7 +1764,7 @@ define i32 @fcmp_0_mult_select1(ptr noalias %x, i32 %N) nounwind readonly { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], splat (i32 2) ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll index 069cb1f7cad7b9..ecb00d47244887 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -208,7 +208,7 @@ for.end: ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 ; CHECK-NEXT: store <8 x i64> [[VEC_IND]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label %middle.block, label [[VECTOR_BODY]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 8bdba25b1b761e..d6eb4264c0a0d7 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -31,7 +31,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -72,7 +72,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; IND-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IND: middle.block: @@ -110,13 +110,13 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL: middle.block: @@ -155,7 +155,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]] ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 @@ -163,7 +163,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP5]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -201,13 +201,13 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; INTERLEAVE: middle.block: @@ -891,7 +891,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i32 1 -; CHECK-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[VEC_PHI]], +; CHECK-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[VEC_PHI]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP18:%.*]] = fadd fast <2 x float> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19]] = fadd fast <2 x float> [[TMP18]], [[TMP16]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -951,7 +951,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; IND-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 ; IND-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i64 0 ; IND-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP13]], i64 1 -; IND-NEXT: [[TMP16:%.*]] = fadd fast <2 x float> [[VEC_PHI]], +; IND-NEXT: [[TMP16:%.*]] = fadd fast <2 x float> [[VEC_PHI]], splat (float 1.000000e+00) ; IND-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[TMP16]], [[TMP9]] ; IND-NEXT: [[TMP18]] = fadd fast <2 x float> [[TMP17]], [[TMP15]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -1026,8 +1026,8 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; UNROLL-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP21]], align 4 ; UNROLL-NEXT: [[TMP28:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0 ; UNROLL-NEXT: [[TMP29:%.*]] = insertelement <2 x float> [[TMP28]], float [[TMP27]], i64 1 -; UNROLL-NEXT: [[TMP30:%.*]] = fadd fast <2 x float> [[VEC_PHI]], -; UNROLL-NEXT: [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], +; UNROLL-NEXT: [[TMP30:%.*]] = fadd fast <2 x float> [[VEC_PHI]], splat (float 1.000000e+00) +; UNROLL-NEXT: [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], splat (float 1.000000e+00) ; UNROLL-NEXT: [[TMP32:%.*]] = fadd fast <2 x float> [[TMP30]], [[TMP13]] ; UNROLL-NEXT: [[TMP33:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP17]] ; UNROLL-NEXT: [[TMP34]] = fadd fast <2 x float> [[TMP32]], [[TMP25]] @@ -1107,8 +1107,8 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load float, ptr [[TMP22]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP28]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI]], -; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], +; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI]], splat (float 1.000000e+00) +; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], splat (float 1.000000e+00) ; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = fadd fast <2 x float> [[TMP32]], [[TMP18]] ; UNROLL-NO-IC-NEXT: [[TMP35]] = fadd fast <2 x float> [[TMP33]], [[TMP26]] @@ -1174,8 +1174,8 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; INTERLEAVE-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x float> [[WIDE_VEC4]], <32 x float> poison, <4 x i32> ; INTERLEAVE-NEXT: [[WIDE_VEC6:%.*]] = load <32 x float>, ptr [[TMP9]], align 4 ; INTERLEAVE-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <32 x float> [[WIDE_VEC6]], <32 x float> poison, <4 x i32> -; INTERLEAVE-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_PHI]], -; INTERLEAVE-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], +; INTERLEAVE-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_PHI]], splat (float 1.000000e+00) +; INTERLEAVE-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], splat (float 1.000000e+00) ; INTERLEAVE-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[TMP10]], [[STRIDED_VEC]] ; INTERLEAVE-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[TMP11]], [[STRIDED_VEC3]] ; INTERLEAVE-NEXT: [[TMP14]] = fadd fast <4 x float> [[TMP12]], [[STRIDED_VEC5]] @@ -1574,7 +1574,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = shl nsw <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP11:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 @@ -1586,7 +1586,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP18]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] ; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP19]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: @@ -1637,7 +1637,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; IND-NEXT: [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 1 -; IND-NEXT: [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], +; IND-NEXT: [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; IND-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i64 0 ; IND-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] ; IND-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i64 1 @@ -1649,7 +1649,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; IND-NEXT: store i32 [[TMP15]], ptr [[TMP17]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] ; IND-NEXT: store i32 [[TMP16]], ptr [[TMP18]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IND: middle.block: @@ -1702,9 +1702,9 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NEXT: [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP10:%.*]] = or disjoint i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 3 -; UNROLL-NEXT: [[TMP12:%.*]] = shl nsw <2 x i64> [[VEC_IND]], -; UNROLL-NEXT: [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], -; UNROLL-NEXT: [[TMP13:%.*]] = add <2 x i64> [[STEP_ADD]], +; UNROLL-NEXT: [[TMP12:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; UNROLL-NEXT: [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 2) +; UNROLL-NEXT: [[TMP13:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 8) ; UNROLL-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i64 0 ; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] ; UNROLL-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i64 1 @@ -1726,7 +1726,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NEXT: store i32 [[TMP24]], ptr [[TMP28]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NEXT: store i32 [[TMP25]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; UNROLL-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL: middle.block: @@ -1777,13 +1777,13 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 3 -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = shl nsw <2 x i64> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shl nsw <2 x i64> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shl nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 @@ -1805,7 +1805,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NO-IC-NEXT: store i32 [[TMP25]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NO-IC-NEXT: store i32 [[TMP26]], ptr [[TMP30]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -1989,7 +1989,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; CHECK: pred.udiv.continue2: ; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP14:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP13]] ; CHECK-NEXT: [[TMP15]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -2239,8 +2239,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE8]] ; UNROLL-NO-IC: pred.udiv.continue8: ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = phi <2 x i32> [ [[TMP20]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP25]], [[PRED_UDIV_IF7]] ] -; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], -; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP27]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP14]] ; UNROLL-NO-IC-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[TMP28]], <2 x i32> [[WIDE_LOAD2]], <2 x i32> [[TMP26]] ; UNROLL-NO-IC-NEXT: [[TMP29]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] @@ -2465,7 +2465,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1 ; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: @@ -2513,7 +2513,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; IND-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP5]], i64 1 ; IND-NEXT: store i16 [[TMP9]], ptr [[TMP7]], align 2 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; IND: middle.block: @@ -2551,7 +2551,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[TMP3:%.*]] = or disjoint i64 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 3 @@ -2572,7 +2572,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL-NEXT: [[TMP17:%.*]] = extractelement <2 x i16> [[TMP9]], i64 1 ; UNROLL-NEXT: store i16 [[TMP17]], ptr [[TMP13]], align 2 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; UNROLL: middle.block: @@ -2611,7 +2611,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 @@ -2633,7 +2633,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i16> [[TMP10]], i32 1 ; UNROLL-NO-IC-NEXT: store i16 [[TMP18]], ptr [[TMP14]], align 2 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -2671,7 +2671,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP3:%.*]] = or disjoint i64 [[INDEX]], 1 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 2 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 3 @@ -2708,7 +2708,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <4 x i16> [[TMP13]], i64 3 ; INTERLEAVE-NEXT: store i16 [[TMP29]], ptr [[TMP21]], align 2 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; INTERLEAVE: middle.block: @@ -2765,7 +2765,7 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] @@ -2833,9 +2833,9 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], -; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], +; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4) +; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] @@ -2902,7 +2902,7 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -2970,9 +2970,9 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], -; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], +; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4) +; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -3043,7 +3043,7 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] @@ -3111,9 +3111,9 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], -; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], +; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4) +; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] @@ -3194,7 +3194,7 @@ define i32 @testoverflowcheck() { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> , i32 [[C_PROMOTED_I]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -3323,14 +3323,14 @@ define i32 @testoverflowcheck() { ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> , i32 [[C_PROMOTED_I]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP5]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP6]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI1]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -3465,7 +3465,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; CHECK: middle.block: @@ -3530,7 +3530,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; IND-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; IND: middle.block: @@ -3590,7 +3590,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64 @@ -3599,7 +3599,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL: middle.block: @@ -3663,7 +3663,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i8 [[OFFSET_IDX]], 0 @@ -3673,7 +3673,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP14]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -3732,7 +3732,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; INTERLEAVE-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64 @@ -3741,7 +3741,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP11]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; INTERLEAVE: middle.block: @@ -3845,7 +3845,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; CHECK: middle.block: @@ -3913,7 +3913,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; IND-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; IND-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; IND: middle.block: @@ -3976,7 +3976,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; UNROLL-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64 @@ -3985,7 +3985,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 16) ; UNROLL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; UNROLL: middle.block: @@ -4052,7 +4052,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; UNROLL-NO-IC-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add i8 [[OFFSET_IDX]], 0 @@ -4062,7 +4062,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP16]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 8) ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4124,7 +4124,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 16) ; INTERLEAVE-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; INTERLEAVE-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64 @@ -4133,7 +4133,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP11]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 32) ; INTERLEAVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; INTERLEAVE: middle.block: @@ -4211,7 +4211,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; CHECK: middle.block: @@ -4244,7 +4244,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; IND: middle.block: @@ -4274,14 +4274,14 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; UNROLL: middle.block: @@ -4312,7 +4312,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -4320,7 +4320,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4349,14 +4349,14 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; INTERLEAVE: middle.block: @@ -4416,7 +4416,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; CHECK: middle.block: @@ -4454,7 +4454,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; IND: middle.block: @@ -4489,7 +4489,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 ; UNROLL-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30 ; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] @@ -4497,7 +4497,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; UNROLL: middle.block: @@ -4537,7 +4537,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 0 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP6]] @@ -4546,7 +4546,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP8]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP9]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4579,7 +4579,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] @@ -4587,7 +4587,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; INTERLEAVE: middle.block: @@ -4650,7 +4650,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; CHECK: middle.block: @@ -4689,7 +4689,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; IND-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; IND: middle.block: @@ -4724,7 +4724,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; UNROLL-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64 ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] @@ -4732,7 +4732,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; UNROLL: middle.block: @@ -4768,7 +4768,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP1]] @@ -4777,7 +4777,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -4811,7 +4811,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] @@ -4819,7 +4819,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; INTERLEAVE: middle.block: @@ -4873,7 +4873,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; CHECK: middle.block: @@ -4911,7 +4911,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; IND-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP0]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; IND-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; IND: middle.block: @@ -4946,13 +4946,13 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP0]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP1]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8) ; UNROLL-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; UNROLL: middle.block: @@ -4988,7 +4988,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -4996,7 +4996,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -5031,13 +5031,13 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP0]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP1]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 16) ; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; INTERLEAVE: middle.block: @@ -5098,7 +5098,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; CHECK: pred.urem.if: @@ -5121,7 +5121,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; CHECK-NEXT: [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; CHECK: middle.block: @@ -5185,7 +5185,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; IND-NEXT: [[TMP13:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; IND-NEXT: [[TMP14]] = or <2 x i32> [[VEC_PHI]], [[TMP13]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; IND-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; IND-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; IND: middle.block: @@ -5216,7 +5216,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE7]] ] ; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16 ; UNROLL-NEXT: [[TMP2:%.*]] = icmp ne <2 x i16> [[VEC_IND]], zeroinitializer -; UNROLL-NEXT: [[TMP3:%.*]] = icmp ne <2 x i16> [[VEC_IND]], +; UNROLL-NEXT: [[TMP3:%.*]] = icmp ne <2 x i16> [[VEC_IND]], splat (i16 -2) ; UNROLL-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0 ; UNROLL-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; UNROLL: pred.urem.if: @@ -5260,7 +5260,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NEXT: [[TMP26]] = or <2 x i32> [[VEC_PHI]], [[TMP24]] ; UNROLL-NEXT: [[TMP27]] = or <2 x i32> [[VEC_PHI1]], [[TMP25]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 4) ; UNROLL-NEXT: [[TMP28:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; UNROLL-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; UNROLL: middle.block: @@ -5290,13 +5290,13 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UREM_CONTINUE7]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UREM_CONTINUE7]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE7]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i16> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq <2 x i16> [[STEP_ADD]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; UNROLL-NO-IC: pred.urem.if: @@ -5340,7 +5340,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]] ; UNROLL-NO-IC-NEXT: [[TMP29]] = or <2 x i32> [[VEC_PHI1]], [[TMP27]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[STEP_ADD]], splat (i16 2) ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -5384,7 +5384,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE15]] ] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp ne <4 x i16> [[VEC_IND]], zeroinitializer -; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp ne <4 x i16> [[VEC_IND]], +; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp ne <4 x i16> [[VEC_IND]], splat (i16 -4) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 ; INTERLEAVE-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; INTERLEAVE: pred.urem.if: @@ -5464,7 +5464,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; INTERLEAVE-NEXT: [[TMP46]] = or <4 x i32> [[VEC_PHI]], [[TMP44]] ; INTERLEAVE-NEXT: [[TMP47]] = or <4 x i32> [[VEC_PHI1]], [[TMP45]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 8) ; INTERLEAVE-NEXT: [[TMP48:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; INTERLEAVE-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; INTERLEAVE: middle.block: @@ -5541,19 +5541,19 @@ define i64 @trunc_with_first_order_recurrence() { ; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND2]] -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], splat (i32 42) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND2]] ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> ; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], -; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) +; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; CHECK: middle.block: @@ -5605,19 +5605,19 @@ define i64 @trunc_with_first_order_recurrence() { ; IND-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] ; IND-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> ; IND-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND2]] -; IND-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], +; IND-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], splat (i32 42) ; IND-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND2]] ; IND-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]] ; IND-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> ; IND-NEXT: [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]] -; IND-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], +; IND-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], splat (i32 1) ; IND-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]] ; IND-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> ; IND-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], -; IND-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) +; IND-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2) ; IND-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; IND: middle.block: @@ -5666,14 +5666,14 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND5:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], -; UNROLL-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; UNROLL-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], splat (i32 2) ; UNROLL-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> ; UNROLL-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD7]], <2 x i32> ; UNROLL-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND3]] ; UNROLL-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD]], [[STEP_ADD7]] -; UNROLL-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], -; UNROLL-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], +; UNROLL-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], splat (i32 42) +; UNROLL-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], splat (i32 42) ; UNROLL-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND3]] ; UNROLL-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD7]] ; UNROLL-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]] @@ -5682,9 +5682,9 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NEXT: [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64> ; UNROLL-NEXT: [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]] ; UNROLL-NEXT: [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]] -; UNROLL-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], -; UNROLL-NEXT: [[STEP_ADD8:%.*]] = shl <2 x i32> [[VEC_IND5]], -; UNROLL-NEXT: [[TMP15:%.*]] = add <2 x i32> [[STEP_ADD8]], +; UNROLL-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], splat (i32 1) +; UNROLL-NEXT: [[STEP_ADD8:%.*]] = shl <2 x i32> [[VEC_IND5]], splat (i32 1) +; UNROLL-NEXT: [[TMP15:%.*]] = add <2 x i32> [[STEP_ADD8]], splat (i32 4) ; UNROLL-NEXT: [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]] ; UNROLL-NEXT: [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]] ; UNROLL-NEXT: [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64> @@ -5692,9 +5692,9 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NEXT: [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]] ; UNROLL-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], -; UNROLL-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[VEC_IND3]], -; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND5]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) +; UNROLL-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[VEC_IND3]], splat (i32 4) +; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND5]], splat (i32 4) ; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; UNROLL: middle.block: @@ -5744,15 +5744,15 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND5:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], -; UNROLL-NO-IC-NEXT: [[STEP_ADD8:%.*]] = add <2 x i32> [[VEC_IND5]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[STEP_ADD8:%.*]] = add <2 x i32> [[VEC_IND5]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD7]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND3]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD]], [[STEP_ADD7]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], splat (i32 42) +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], splat (i32 42) ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND3]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD7]] ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]] @@ -5761,8 +5761,8 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64> ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]] -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shl <2 x i32> [[STEP_ADD8]], +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], splat (i32 1) +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shl <2 x i32> [[STEP_ADD8]], splat (i32 1) ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]] ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64> @@ -5770,9 +5770,9 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]] ; UNROLL-NO-IC-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[STEP_ADD7]], -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD8]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[STEP_ADD7]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD8]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -5824,14 +5824,14 @@ define i64 @trunc_with_first_order_recurrence() { ; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND5:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], -; INTERLEAVE-NEXT: [[STEP_ADD7]] = add <4 x i32> [[VEC_IND3]], +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; INTERLEAVE-NEXT: [[STEP_ADD7]] = add <4 x i32> [[VEC_IND3]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND3]], <4 x i32> ; INTERLEAVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND3]], <4 x i32> [[STEP_ADD7]], <4 x i32> ; INTERLEAVE-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[VEC_IND]], [[VEC_IND3]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[STEP_ADD]], [[STEP_ADD7]] -; INTERLEAVE-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], -; INTERLEAVE-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], +; INTERLEAVE-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], splat (i32 42) +; INTERLEAVE-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], splat (i32 42) ; INTERLEAVE-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP0]], [[VEC_IND3]] ; INTERLEAVE-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP1]], [[STEP_ADD7]] ; INTERLEAVE-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP6]], [[TMP4]] @@ -5840,9 +5840,9 @@ define i64 @trunc_with_first_order_recurrence() { ; INTERLEAVE-NEXT: [[TMP11:%.*]] = sext <4 x i32> [[TMP9]] to <4 x i64> ; INTERLEAVE-NEXT: [[TMP12:%.*]] = add <4 x i64> [[VEC_PHI]], [[TMP10]] ; INTERLEAVE-NEXT: [[TMP13:%.*]] = add <4 x i64> [[VEC_PHI2]], [[TMP11]] -; INTERLEAVE-NEXT: [[TMP14:%.*]] = shl <4 x i32> [[VEC_IND5]], -; INTERLEAVE-NEXT: [[STEP_ADD8:%.*]] = shl <4 x i32> [[VEC_IND5]], -; INTERLEAVE-NEXT: [[TMP15:%.*]] = add <4 x i32> [[STEP_ADD8]], +; INTERLEAVE-NEXT: [[TMP14:%.*]] = shl <4 x i32> [[VEC_IND5]], splat (i32 1) +; INTERLEAVE-NEXT: [[STEP_ADD8:%.*]] = shl <4 x i32> [[VEC_IND5]], splat (i32 1) +; INTERLEAVE-NEXT: [[TMP15:%.*]] = add <4 x i32> [[STEP_ADD8]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP4]], [[TMP14]] ; INTERLEAVE-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP5]], [[TMP15]] ; INTERLEAVE-NEXT: [[TMP18:%.*]] = sext <4 x i32> [[TMP16]] to <4 x i64> @@ -5850,9 +5850,9 @@ define i64 @trunc_with_first_order_recurrence() { ; INTERLEAVE-NEXT: [[TMP20]] = add <4 x i64> [[TMP12]], [[TMP18]] ; INTERLEAVE-NEXT: [[TMP21]] = add <4 x i64> [[TMP13]], [[TMP19]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; INTERLEAVE-NEXT: [[VEC_IND_NEXT4]] = add <4 x i32> [[VEC_IND3]], -; INTERLEAVE-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND5]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) +; INTERLEAVE-NEXT: [[VEC_IND_NEXT4]] = add <4 x i32> [[VEC_IND3]], splat (i32 8) +; INTERLEAVE-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND5]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 ; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] ; INTERLEAVE: middle.block: @@ -5944,7 +5944,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; CHECK: middle.block: @@ -5992,7 +5992,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; IND-NEXT: [[TMP5:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP2]] ; IND-NEXT: store <2 x i32> [[TMP5]], ptr [[TMP4]], align 4 ; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; IND-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IND-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; IND: middle.block: @@ -6013,7 +6013,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> ; UNROLL-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> ; UNROLL-NEXT: [[TMP2:%.*]] = load i32, ptr [[SRC:%.*]], align 4 @@ -6030,7 +6030,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP6]], align 4 ; UNROLL-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP9]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; UNROLL: middle.block: @@ -6051,7 +6051,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], +; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> @@ -6069,7 +6069,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP10]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP9]], ptr [[TMP11]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -6106,7 +6106,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> ; INTERLEAVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> ; INTERLEAVE-NEXT: [[TMP2:%.*]] = load i32, ptr [[SRC:%.*]], align 4 @@ -6123,7 +6123,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; INTERLEAVE-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP6]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP9]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 ; INTERLEAVE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] ; INTERLEAVE: middle.block: @@ -6435,7 +6435,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]] ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> , [[BROADCAST_SPLAT]] +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul <2 x i32> splat (i32 2), [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul <2 x i32> , [[DOTSPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/induction_plus.ll b/llvm/test/Transforms/LoopVectorize/induction_plus.ll index 36f8dec3b63744..a57b3fad0440ca 100644 --- a/llvm/test/Transforms/LoopVectorize/induction_plus.ll +++ b/llvm/test/Transforms/LoopVectorize/induction_plus.ll @@ -13,7 +13,7 @@ define void @array_at_plus_one(i32 %n) { ; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds [1024 x i32], ptr @array, i64 0, i64 [[T2]] ; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, ptr [[GEP]], i32 0 ; CHECK-NEXT: store <4 x i32> [[VEC_IV_TRUNC]], ptr [[GEP0]] -; CHECK: [[VEC_IV_TRUNC_NEXT]] = add <4 x i32> [[VEC_IV_TRUNC]], +; CHECK: [[VEC_IV_TRUNC_NEXT]] = add <4 x i32> [[VEC_IV_TRUNC]], splat (i32 4) ; CHECK: ret void ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll index 553fc374e0fdf2..1f5d80a670a2d8 100644 --- a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll @@ -14,7 +14,7 @@ define i32 @one_direct_branch(ptr %src) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> , [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> splat (i32 25500), [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -72,7 +72,7 @@ define i32 @two_direct_branch(ptr %src) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> , [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> splat (i32 25500), [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -140,12 +140,12 @@ define i32 @cond_branch(i32 %a, ptr %src) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> , [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> splat (i32 25500), [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> , <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> splat (i32 10), <4 x i32> [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -210,7 +210,7 @@ define i32 @optimizable_trunc_used_outside() { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll index 2b5e06c8c948be..a7f3057935b6de 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll @@ -58,7 +58,7 @@ define void @_Z4funcPjS_hh(ptr noalias nocapture readonly %a, ptr noalias nocapt ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = shl <4 x i32> [[TMP23]], +; CHECK-NEXT: [[TMP24:%.*]] = shl <4 x i32> [[TMP23]], splat (i32 1) ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP25]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index 54c08b47598e0f..dc4fb0f15c6447 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -112,10 +112,10 @@ define void @test_struct_array_load3_store3() { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1024 x %struct.ST3], ptr @S, i64 0, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[STRIDED_VEC2]], -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[STRIDED_VEC2]], splat (i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], splat (i32 3) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <12 x i32> @@ -267,11 +267,11 @@ define void @test_struct_store4(ptr noalias nocapture readonly %A, ptr noalias n ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], ptr [[B:%.*]], i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 4) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <16 x i32> @@ -362,7 +362,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -421,7 +421,7 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[INDEX]], 9223372036854775804 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP2]] ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP3]], align 4 @@ -497,7 +497,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[TMP6:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1) ; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[INDEX]], 9223372036854775804 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP7]] ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4 @@ -612,7 +612,7 @@ define void @load_gap_reverse(ptr noalias nocapture %P1, ptr noalias nocapture % ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP20]], i64 3 ; CHECK-NEXT: store i64 [[TMP28]], ptr [[TMP11]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -746,7 +746,7 @@ define void @mixed_load3_store3(ptr nocapture %A) { ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <12 x i32> ; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: @@ -1172,7 +1172,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0 @@ -1196,7 +1196,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) { ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: @@ -1366,8 +1366,8 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or disjoint i64 [[TMP4]], 3 ; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[TMP4]], 5 ; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[TMP4]], 7 -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1) +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -3) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] @@ -1402,7 +1402,7 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) { ; CHECK-NEXT: store i32 [[Z]], ptr [[TMP11]], align 4 ; CHECK-NEXT: store i32 [[Z]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll index 63381454cc5900..1473d292d06ac9 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -390,14 +390,14 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[GEP]], align 4, !alias.scope [[META23:![0-9]+]] ; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[TMP16]], +; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[TMP16]], splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP17]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]]) -; CHECK-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX5]], align 4, !alias.scope [[META27:![0-9]+]], !noalias [[META23]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -415,7 +415,7 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ITR]], [[LFTR_WIDEIV]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY3]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY3]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: for.inc8.loopexit.loopexit: ; CHECK-NEXT: br label [[FOR_INC8_LOOPEXIT]] ; CHECK: for.inc8.loopexit: diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 7c610a568eafa3..10b6d1f7653da4 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -203,12 +203,12 @@ define ptr @both(i32 %k) { ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END1]], %[[MIDDLE_BLOCK]] ], [ [[BASE]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END1]], %[[MIDDLE_BLOCK]] ], [ [[BASE]], %[[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END2]], %[[MIDDLE_BLOCK]] ], [ undef, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[INC_LAG1:%.*]] = phi ptr [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[TMP:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INC_LAG1:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[TMP:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[INC_LAG2:%.*]] = phi ptr [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC_LAG1]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[TMP]] = getelementptr inbounds i32, ptr [[INC_LAG1]], i64 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[INC_PHI]], 1 @@ -308,21 +308,21 @@ define void @PR30742() { ; CHECK-NEXT: [[N_VEC7:%.*]] = sub i32 [[TMP4]], [[N_MOD_VF6]] ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[N_VEC7]], -8 ; CHECK-NEXT: [[IND_END8:%.*]] = add i32 [[TMP04]], [[TMP5]] -; CHECK-NEXT: br label %[[VECTOR_BODY9:.*]] -; CHECK: [[VECTOR_BODY9]]: -; CHECK-NEXT: [[INDEX10:%.*]] = phi i32 [ 0, %[[VECTOR_PH5]] ], [ [[INDEX_NEXT11:%.*]], %[[VECTOR_BODY9]] ] +; CHECK-NEXT: br label %[[VECTOR_BODY10:.*]] +; CHECK: [[VECTOR_BODY10]]: +; CHECK-NEXT: [[INDEX10:%.*]] = phi i32 [ 0, %[[VECTOR_PH5]] ], [ [[INDEX_NEXT11:%.*]], %[[VECTOR_BODY10]] ] ; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i32 [[INDEX10]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT11]], [[N_VEC7]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK2:.*]], label %[[VECTOR_BODY9]], {{!llvm.loop ![0-9]+}} +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK2:.*]], label %[[VECTOR_BODY10]], {{!llvm.loop ![0-9]+}} ; CHECK: [[MIDDLE_BLOCK2]]: ; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i32 [[TMP4]], [[N_VEC7]] ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END8]], -8 ; CHECK-NEXT: br i1 [[CMP_N12]], label %[[BB3:.*]], label %[[SCALAR_PH3]] ; CHECK: [[SCALAR_PH3]]: -; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i32 [ [[IND_END8]], %[[MIDDLE_BLOCK2]] ], [ [[TMP04]], %[[BB1]] ] +; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i32 [ [[IND_END8]], %[[MIDDLE_BLOCK2]] ], [ [[TMP04]], %[[BB1]] ] ; CHECK-NEXT: br label %[[BB2:.*]] ; CHECK: [[BB2]]: -; CHECK-NEXT: [[TMP05:%.*]] = phi i32 [ [[BC_RESUME_VAL13]], %[[SCALAR_PH3]] ], [ [[TMP06:%.*]], %[[BB2]] ] +; CHECK-NEXT: [[TMP05:%.*]] = phi i32 [ [[BC_RESUME_VAL9]], %[[SCALAR_PH3]] ], [ [[TMP06:%.*]], %[[BB2]] ] ; CHECK-NEXT: [[TMP06]] = add i32 [[TMP05]], -8 ; CHECK-NEXT: [[TMP07:%.*]] = icmp sgt i32 [[TMP06]], 0 ; CHECK-NEXT: br i1 [[TMP07]], label %[[BB2]], label %[[BB3]], {{!llvm.loop ![0-9]+}} @@ -411,7 +411,7 @@ define i64 @iv_scalar_steps_and_outside_users(ptr %ptr) { ; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 ; VEC-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; VEC-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1002 ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: @@ -463,7 +463,7 @@ define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) { ; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 ; VEC-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; VEC-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1002 ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll index 327ffaad63a654..0e7bb763237017 100644 --- a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll @@ -21,14 +21,14 @@ define void @accesses_to_struct_dereferenceable(ptr noalias %dst) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> [[WIDE_LOAD2]] ; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 @@ -106,7 +106,7 @@ define void @accesses_to_struct_may_not_be_dereferenceable_due_to_loop_bound(ptr ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -228,7 +228,7 @@ define void @accesses_to_struct_may_not_be_dereferenceable_access_size(ptr noali ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index 80d705cbf9b9f8..b59cb66b11acf5 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -77,7 +77,7 @@ define void @bottom_tested(ptr %p, i32 %n) { ; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] ; TAILFOLD: pred.store.continue2: ; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 -; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; TAILFOLD-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TAILFOLD: middle.block: @@ -199,7 +199,7 @@ define i32 @early_exit_with_live_out(ptr %ptr) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <2 x i32> , ptr [[TMP2]], align 4 +; CHECK-NEXT: store <2 x i32> splat (i32 10), ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -847,8 +847,8 @@ define i32 @multiple_exit_switch(ptr %p, i32 %n) { ; CHECK-NEXT: store i16 0, ptr [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ -; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] -; CHECK-NEXT: i32 2097, label [[IF_END]] +; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] +; CHECK-NEXT: i32 2097, label [[IF_END]] ; CHECK-NEXT: ] ; CHECK: if.end: ; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] @@ -864,8 +864,8 @@ define i32 @multiple_exit_switch(ptr %p, i32 %n) { ; TAILFOLD-NEXT: store i16 0, ptr [[B]], align 4 ; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ -; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] -; TAILFOLD-NEXT: i32 2097, label [[IF_END]] +; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] +; TAILFOLD-NEXT: i32 2097, label [[IF_END]] ; TAILFOLD-NEXT: ] ; TAILFOLD: if.end: ; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] @@ -902,8 +902,8 @@ define i32 @multiple_exit_switch2(ptr %p, i32 %n) { ; CHECK-NEXT: store i16 0, ptr [[B]], align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ -; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] -; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] +; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] +; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] ; CHECK-NEXT: ] ; CHECK: if.end: ; CHECK-NEXT: ret i32 0 @@ -920,8 +920,8 @@ define i32 @multiple_exit_switch2(ptr %p, i32 %n) { ; TAILFOLD-NEXT: store i16 0, ptr [[B]], align 4 ; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ -; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] -; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] +; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] +; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] ; TAILFOLD-NEXT: ] ; TAILFOLD: if.end: ; TAILFOLD-NEXT: ret i32 0 @@ -1090,7 +1090,7 @@ define void @scalar_predication(ptr %addr) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: diff --git a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll index de298d20fc382c..0bffac24227571 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll @@ -21,7 +21,7 @@ define void @vector_gep(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll index 0515e6f07bf015..c3d8a75acaaaf7 100644 --- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll @@ -30,7 +30,7 @@ define void @maxvf3() { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 14) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -47,7 +47,7 @@ define void @maxvf3() { ; CHECK-NEXT: store i8 69, ptr [[TMP6]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: -; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <2 x i32> , [[VEC_IND]] +; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <2 x i32> splat (i32 3), [[VEC_IND]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: @@ -65,7 +65,7 @@ define void @maxvf3() { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll index 80348f9e46130f..ca0edb3e1a46d6 100644 --- a/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll +++ b/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll @@ -26,7 +26,7 @@ define i32 @main() #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [40000 x i8], ptr addrspace(1) @Y, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr addrspace(1) [[TMP0]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [40000 x i8], ptr @X, i64 0, i64 [[INDEX]] ; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll index d462d3aa650d28..7a3f9df6ec2821 100644 --- a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll +++ b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll @@ -45,7 +45,7 @@ define i32 @test(ptr %arr, i64 %n) { ; CHECK-NEXT: [[TMP17:%.*]] = add nsw i64 [[TMP13]], -1 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP19]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 65), ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll index ad0af978f07dd1..c45d05e21a1380 100644 --- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll @@ -23,7 +23,7 @@ define i32 @test1() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -35,21 +35,21 @@ define i32 @test1() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -96,7 +96,7 @@ define i32 @test2() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -108,21 +108,21 @@ define i32 @test2() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> [[VEC_IND]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> [[VEC_IND]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -169,7 +169,7 @@ define i32 @test3(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -183,25 +183,25 @@ define i32 @test3(i32 %N) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP4]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> , <2 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 2), <2 x i32> [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[PREDPHI1]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -258,7 +258,7 @@ define i32 @test4(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -270,21 +270,21 @@ define i32 @test4(i32 %N) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[DOTLR_PH_I_PREHEADER]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -520,7 +520,7 @@ define i8 @outside_user_non_phi() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -532,22 +532,22 @@ define i8 @outside_user_non_phi() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> , <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[PREDPHI]] to <2 x i8> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -651,14 +651,14 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[C1]], [[B2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[C1]], [[A3]] ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP3]], 8 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[_LR_PH_I]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -685,12 +685,12 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ], [ [[B_PROMOTED]], %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = sext i32 [[IV]] to i64 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[BLOAD:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 @@ -740,15 +740,15 @@ define i32 @non_uniform_live_out() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 7) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store <2 x i8> [[TMP4]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 26a2fb3806d3e7..305906d9d1ef1d 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -189,7 +189,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[REVERSE]], +; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; CHECK-NEXT: store <4 x float> [[TMP16]], ptr [[TMP18]], align 4 @@ -223,7 +223,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4 ; CHECK-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x float> [[WIDE_LOAD9]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <4 x float> [[REVERSE10]], +; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <4 x float> [[REVERSE10]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i32 0 ; CHECK-NEXT: store <4 x float> [[TMP28]], ptr [[TMP30]], align 4 @@ -334,7 +334,7 @@ define void @f3(ptr noalias %A, i64 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <4 x i8> , ptr [[TMP2]], align 1 +; CHECK-NEXT: store <4 x i8> splat (i8 1), ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -355,7 +355,7 @@ define void @f3(ptr noalias %A, i64 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX5]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <4 x i8> , ptr [[TMP6]], align 1 +; CHECK-NEXT: store <4 x i8> splat (i8 1), ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -393,7 +393,7 @@ define void @f3(ptr noalias %A, i64 %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <4 x i8> , ptr [[TMP2]], align 1 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <4 x i8> splat (i8 1), ptr [[TMP2]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -414,7 +414,7 @@ define void @f3(ptr noalias %A, i64 %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP4:%.*]] = add i64 [[INDEX5]], 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> , ptr [[TMP6]], align 1 +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> splat (i8 1), ptr [[TMP6]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 2 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -685,7 +685,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -714,7 +714,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x i8> [[TMP7]], ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], +; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], splat (i32 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -756,7 +756,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP3]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: middle.block: @@ -785,7 +785,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> [[TMP7]], ptr [[TMP9]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], splat (i32 2) ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index 27e5889356b092..0f5dcca81d3718 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -232,7 +232,7 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 1024) ; CHECK-NEXT: [[TMP0:%.*]] = mul nsw <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] @@ -251,7 +251,7 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll index b6f2ae3c14893a..1cf410c359f06d 100644 --- a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll @@ -28,21 +28,21 @@ define void @test(ptr %src, i64 %n) { ; CHECK: loop.32: ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, [[LOOP_2_HEADER1]] ], [ [[TMP2:%.*]], [[LOOP_32]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2000 x i32], ptr [[SRC:%.*]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI3]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_GATHER]], -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> ) -; CHECK-NEXT: [[TMP2]] = add nuw nsw <4 x i64> [[VEC_PHI3]], +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 10) +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP2]] = add nuw nsw <4 x i64> [[VEC_PHI3]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP_2_LATCH4]], label [[LOOP_32]] ; CHECK: loop.2.latch4: -; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], +; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_1_LATCH5]], label [[LOOP_2_HEADER1]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -73,7 +73,7 @@ define void @test(ptr %src, i64 %n) { ; CHECK: loop.1.latch: ; CHECK-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1 ; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT]], label [[LOOP_1_HEADER]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT]], label [[LOOP_1_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll index 5df4fce2190a3a..ae8f0f61231cc5 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll @@ -41,19 +41,19 @@ define void @non_outermost_loop_hcfg_construction(i64 %n, ptr %a) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[MIDDLE_LOOP_LATCH4:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[MIDDLE_LOOP_LATCH4]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[A]], <4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> , <4 x ptr> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: br label [[INNERMOST_LOOP1:%.*]] ; CHECK: innermost.loop1: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP5:%.*]], [[INNERMOST_LOOP1]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, <4 x ptr> [[WIDE_MASKED_GATHER]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[BROADCAST_SPLAT]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> ) -; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[BROADCAST_SPLAT]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT3]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_LATCH4]], label [[INNERMOST_LOOP1]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -187,16 +187,16 @@ define void @non_outermost_loop_hcfg_construction_other_loops_at_same_level(i64 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[BROADCAST_SPLAT]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[INVARIANT_GEP]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> , <4 x ptr> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP3]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, <4 x ptr> [[WIDE_MASKED_GATHER]], <4 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP2]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> ) -; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], +; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP2]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT3]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_LOOP_J0_CLEANUP4]], label [[INNERMOST_LOOP1]] ; CHECK: vector.latch: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll index 40d0a51f9d5e87..80e7de71870b7c 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll @@ -23,7 +23,7 @@ ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]] ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> ) +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]] ; CHECK: br label %[[InnerLoop:.+]] @@ -31,15 +31,15 @@ ; CHECK: [[InnerLoop]]: ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] ; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> %[[InnerPhi]], -; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> splat (i1 true)) +; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], splat (i64 1) +; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], splat (i64 8) ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0 ; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] ; CHECK: [[ForInc]]: ; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], +; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) ; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll index 92a4ea2bbda70e..29e633316b1e45 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll @@ -23,11 +23,11 @@ ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; CHECK: %[[AAddr:.*]] = getelementptr inbounds [1024 x i32], ptr @A, i64 0, <4 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[CSplat]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> ) +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[CSplat]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) ; CHECK: br i1 %[[ZeroTripChk]], label %[[InnerForPh:.*]], label %[[OuterInc:.*]] ; CHECK: [[InnerForPh]]: -; CHECK: %[[WideAVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[AAddr]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK: %[[WideAVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> ; CHECK: br label %[[InnerForBody:.*]] @@ -35,22 +35,22 @@ ; CHECK: %[[InnerInd:.*]] = phi <4 x i64> [ zeroinitializer, %[[InnerForPh]] ], [ %[[InnerIndNext:.*]], %[[InnerForBody]] ] ; CHECK: %[[AccumPhi:.*]] = phi <4 x i32> [ %[[WideAVal]], %[[InnerForPh]] ], [ %[[AccumPhiNext:.*]], %[[InnerForBody]] ] ; CHECK: %[[BAddr:.*]] = getelementptr inbounds [1024 x i32], ptr @B, i64 0, <4 x i64> %[[InnerInd]] -; CHECK: %[[WideBVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[BAddr]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK: %[[WideBVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[BAddr]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK: %[[Add1:.*]] = add nsw <4 x i32> %[[WideBVal]], %[[VecIndTr]] ; CHECK: %[[AccumPhiNext]] = add nsw <4 x i32> %[[Add1]], %[[AccumPhi]] -; CHECK: %[[InnerIndNext]] = add nuw nsw <4 x i64> %[[InnerInd]], +; CHECK: %[[InnerIndNext]] = add nuw nsw <4 x i64> %[[InnerInd]], splat (i64 1) ; CHECK: %[[InnerVecCond:.*]] = icmp eq <4 x i64> %[[InnerIndNext]], {{.*}} ; CHECK: %[[InnerCond:.+]] = extractelement <4 x i1> %[[InnerVecCond]], i32 0 ; CHECK: br i1 %[[InnerCond]], label %[[InnerCrit:.*]], label %[[InnerForBody]] ; CHECK: [[InnerCrit]]: ; CHECK: %[[StorePhi:.*]] = phi <4 x i32> [ %[[AccumPhiNext]], %[[InnerForBody]] ] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StorePhi]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> ) +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StorePhi]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) ; CHECK: br label %[[ForInc]] ; CHECK: [[ForInc]]: ; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 -; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], +; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) ; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], {{.*}} ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/phi-cost.ll b/llvm/test/Transforms/LoopVectorize/phi-cost.ll index 4fe3f05c923330..b857385e38535e 100644 --- a/llvm/test/Transforms/LoopVectorize/phi-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/phi-cost.ll @@ -102,11 +102,11 @@ define void @phi_three_incoming_values(ptr noalias %a, ptr noalias %b, i64 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD2]], -; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> , <2 x i32> -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[TMP8]], <2 x i32> -; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[PREDPHI]], <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 20) +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD2]], splat (i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> splat (i32 4), <2 x i32> splat (i32 5) +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[TMP8]], <2 x i32> splat (i32 3) +; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[PREDPHI]], <2 x i32> splat (i32 9) ; CHECK-NEXT: store <2 x i32> [[PREDPHI3]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 41d9c4d84202c6..531164a2c5dd05 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -32,7 +32,7 @@ define void @a(ptr readnone %b) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -152,7 +152,7 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4 diff --git a/llvm/test/Transforms/LoopVectorize/pr35773.ll b/llvm/test/Transforms/LoopVectorize/pr35773.ll index c83a7f7705f08b..ce6ddb2569d456 100644 --- a/llvm/test/Transforms/LoopVectorize/pr35773.ll +++ b/llvm/test/Transforms/LoopVectorize/pr35773.ll @@ -20,8 +20,8 @@ define void @doit1(ptr %ptr) { ; CHECK-NEXT: store <4 x i32> [[I32_IV]], ptr [[GEP2]], align 4 ; CHECK-NEXT: [[MAIN_IV_NEXT]] = add nuw i32 [[MAIN_IV]], 4 -; CHECK-NEXT: [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], -; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], +; CHECK-NEXT: [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], splat (i32 36) +; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], splat (i8 36) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[MAIN_IV_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll index 42c4373793aa62..9f8f618f095854 100644 --- a/llvm/test/Transforms/LoopVectorize/pr37248.ll +++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll @@ -47,7 +47,7 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 ; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[TMP10]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: diff --git a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll index d04cca0946a4f1..fd040ec937da7e 100644 --- a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll +++ b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll @@ -62,7 +62,7 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP20]], ptr [[TMP22]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll index f1075e31688cb3..e7b87b0aea5331 100644 --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -20,7 +20,7 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]] ; CHECK: pred.srem.if: @@ -37,7 +37,7 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: br label [[PRED_SREM_CONTINUE2]] ; CHECK: pred.srem.continue2: ; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[TMP6]], [[PRED_SREM_CONTINUE]] ], [ [[TMP9]], [[PRED_SREM_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> , <2 x i16> [[TMP10]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> splat (i16 5786), <2 x i16> [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 ; CHECK-NEXT: store i16 [[TMP11]], ptr @v_39, align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -64,7 +64,7 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: store i16 [[COND6]], ptr @v_39, align 1 ; CHECK-NEXT: [[INC7]] = add nsw i16 [[I_07]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC7]], 111 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[RV:%.*]] = load i16, ptr @v_39, align 1 ; CHECK-NEXT: ret i16 [[RV]] diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll index 008971697775e4..42bd14e4b752bc 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45259.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll @@ -41,7 +41,7 @@ define i8 @widget(ptr %arr, i8 %t9) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i8> [[VEC_IND]], +; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 1) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[ARR]], i8 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp slt <4 x i8> [[TMP11]], [[BROADCAST_SPLAT]] @@ -49,7 +49,7 @@ define i8 @widget(ptr %arr, i8 %t9) { ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0 ; CHECK-NEXT: store <4 x i8> [[TMP15]], ptr [[TMP16]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -66,7 +66,7 @@ define i8 @widget(ptr %arr, i8 %t9) { ; CHECK-NEXT: [[T3_I8:%.*]] = zext i1 [[T3_I]] to i8 ; CHECK-NEXT: store i8 [[T3_I8]], ptr [[PTR]], align 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[T1_0_LCSSA]], [[PTR]] -; CHECK-NEXT: br i1 [[EC]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i8 [ [[IV_NEXT]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i8 [[IV_NEXT_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/pr45525.ll b/llvm/test/Transforms/LoopVectorize/pr45525.ll index e52d98b63abfd1..25a32d9e3e32eb 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45525.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45525.ll @@ -10,7 +10,7 @@ define void @main(i1 %cond, ptr %arr) { ; CHECK: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK: [[TMP5:%.*]] = mul <4 x i32> [[VEC_IND]], +; CHECK: [[TMP5:%.*]] = mul <4 x i32> [[VEC_IND]], splat (i32 3) ; bb.0: br label %bb.1 diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index dc3480fbb11a88..ee6975fa0c9e6c 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -19,7 +19,7 @@ define void @pr45679(ptr %A) optsize { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 13) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -53,7 +53,7 @@ define void @pr45679(ptr %A) optsize { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -79,9 +79,9 @@ define void @pr45679(ptr %A) optsize { ; VF2UF2: vector.body: ; VF2UF2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] ; VF2UF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ] -; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], -; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], -; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[STEP_ADD]], +; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 13) +; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[STEP_ADD]], splat (i32 13) ; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF2UF2: pred.store.if: @@ -115,7 +115,7 @@ define void @pr45679(ptr %A) optsize { ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE7]] ; VF2UF2: pred.store.continue6: ; VF2UF2-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], +; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; VF2UF2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; VF2UF2-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2UF2: middle.block: @@ -214,7 +214,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 13) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -252,7 +252,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -279,9 +279,9 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2: vector.body: ; VF2UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] ; VF2UF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ] -; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], -; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], +; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 13) +; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], splat (i64 13) ; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF2UF2: pred.store.if: @@ -319,7 +319,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE7]] ; VF2UF2: pred.store.continue6: ; VF2UF2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; VF2UF2-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF2UF2-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2UF2: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll index 5f2298514be50e..d7f5f646f1873c 100644 --- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll @@ -22,7 +22,7 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IV]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IV]], splat (i32 40) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -77,7 +77,7 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize { ; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[TMP29]], [[TMP30]] ; CHECK-NEXT: [[PREVSUM]] = add nsw i16 [[SUM]], [[ADD]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[IV]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[PREVSUM_LCSSA:%.*]] = phi i16 [ [[PREVSUM]], [[LOOP]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[PREVSUM_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index bc7e3c6c6e2510..4f47e66816c9a2 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -8,7 +8,7 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], splat (i32 1) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[C_1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT1]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i1> poison, i1 [[C_2:%.*]], i64 0 @@ -18,19 +18,19 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[PREDPHI7:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 10) +; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], splat (i1 true) ; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], splat (i32 20) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[BROADCAST_SPLAT4]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> , <2 x i32> [[VEC_IND]] -; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> , <2 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 9), <2 x i32> [[VEC_IND]] +; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> splat (i32 9), <2 x i32> [[PREDPHI]] ; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[PREDPHI7]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP3]], <2 x i32> [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 176 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -58,7 +58,7 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[P_2]] = phi i32 [ [[V_2]], [[LOOP_HEADER]] ], [ [[V_2_ADD]], [[BODY_1]] ], [ [[ADD_2]], [[BODY_2]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ult i32 [[IV]], 181 -; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[E_1:%.*]] = phi i32 [ [[P_1]], [[LOOP_LATCH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[E_2:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr66616.ll b/llvm/test/Transforms/LoopVectorize/pr66616.ll index 2fb7f88e5341e5..c60b216f86fa8d 100644 --- a/llvm/test/Transforms/LoopVectorize/pr66616.ll +++ b/llvm/test/Transforms/LoopVectorize/pr66616.ll @@ -15,7 +15,7 @@ define void @pr66616(ptr %ptr) { ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index e18b3d7477a16d..4019b34217f2d5 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -24,10 +24,10 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]] ; IC1-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 ; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1 -; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], -; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], +; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12) +; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13) ; IC1-NEXT: [[TMP11:%.*]] = or <2 x i1> [[TMP7]], [[TMP4]] -; IC1-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP11]], +; IC1-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true) ; IC1-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; IC1-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; IC1: [[PRED_STORE_IF]]: @@ -124,14 +124,14 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 ; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP5]], align 1 ; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1 -; IC2-NEXT: [[TMP13:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], -; IC2-NEXT: [[TMP14:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], -; IC2-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], -; IC2-NEXT: [[TMP8:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], +; IC2-NEXT: [[TMP13:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12) +; IC2-NEXT: [[TMP14:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], splat (i8 -12) +; IC2-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13) +; IC2-NEXT: [[TMP8:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], splat (i8 13) ; IC2-NEXT: [[TMP21:%.*]] = or <2 x i1> [[TMP13]], [[TMP7]] ; IC2-NEXT: [[TMP22:%.*]] = or <2 x i1> [[TMP14]], [[TMP8]] -; IC2-NEXT: [[TMP19:%.*]] = xor <2 x i1> [[TMP21]], -; IC2-NEXT: [[TMP20:%.*]] = xor <2 x i1> [[TMP22]], +; IC2-NEXT: [[TMP19:%.*]] = xor <2 x i1> [[TMP21]], splat (i1 true) +; IC2-NEXT: [[TMP20:%.*]] = xor <2 x i1> [[TMP22]], splat (i1 true) ; IC2-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; IC2-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; IC2: [[PRED_STORE_IF]]: @@ -340,13 +340,13 @@ define void @switch_to_header(ptr %start) { ; IC1-NEXT: [[ENTRY:.*]]: ; IC1-NEXT: br label %[[LOOP_HEADER:.*]] ; IC1: [[LOOP_HEADER]]: -; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_HEADER_BACKEDGE:.*]] ] +; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] ; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC1-NEXT: i64 120, label %[[LOOP_HEADER_BACKEDGE]] +; IC1-NEXT: i64 120, label %[[IF_THEN1]] ; IC1-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC1-NEXT: ] -; IC1: [[LOOP_HEADER_BACKEDGE]]: +; IC1: [[IF_THEN1]]: ; IC1-NEXT: br label %[[LOOP_HEADER]] ; IC1: [[IF_THEN:.*:]] ; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison @@ -354,7 +354,7 @@ define void @switch_to_header(ptr %start) { ; IC1-NEXT: unreachable ; IC1: [[LOOP_LATCH]]: ; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP_HEADER_BACKEDGE]] +; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] ; IC1: [[EXIT]]: ; IC1-NEXT: ret void ; @@ -363,13 +363,13 @@ define void @switch_to_header(ptr %start) { ; IC2-NEXT: [[ENTRY:.*]]: ; IC2-NEXT: br label %[[LOOP_HEADER:.*]] ; IC2: [[LOOP_HEADER]]: -; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_HEADER_BACKEDGE:.*]] ] +; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] ; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC2-NEXT: i64 120, label %[[LOOP_HEADER_BACKEDGE]] +; IC2-NEXT: i64 120, label %[[IF_THEN1]] ; IC2-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC2-NEXT: ] -; IC2: [[LOOP_HEADER_BACKEDGE]]: +; IC2: [[IF_THEN1]]: ; IC2-NEXT: br label %[[LOOP_HEADER]] ; IC2: [[IF_THEN:.*:]] ; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison @@ -377,7 +377,7 @@ define void @switch_to_header(ptr %start) { ; IC2-NEXT: unreachable ; IC2: [[LOOP_LATCH]]: ; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP_HEADER_BACKEDGE]] +; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] ; IC2: [[EXIT]]: ; IC2-NEXT: ret void ; @@ -417,7 +417,7 @@ define void @switch_all_to_default(ptr %start) { ; IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[TMP0]] ; IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -; IC1-NEXT: store <2 x i64> , ptr [[TMP2]], align 1 +; IC1-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP2]], align 1 ; IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -453,8 +453,8 @@ define void @switch_all_to_default(ptr %start) { ; IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[TMP0]] ; IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; IC2-NEXT: store <2 x i64> , ptr [[TMP4]], align 1 -; IC2-NEXT: store <2 x i64> , ptr [[TMP5]], align 1 +; IC2-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP4]], align 1 +; IC2-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP5]], align 1 ; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; IC2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll b/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll index 66509ffedd68ba..2bf02b0b906f7e 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll @@ -18,8 +18,8 @@ define void @generate_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = or disjoint <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = or disjoint <4 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[WIDE_LOAD]], splat (i32 3) ; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP3]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll index 567eea8e2aeb82..240421341626ab 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -15,7 +15,7 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], splat (float 5.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -129,7 +129,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -169,7 +169,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP25:%.*]] = select fast <4 x i1> [[TMP1]], <4 x float> [[TMP24]], <4 x float> +; CHECK-NEXT: [[TMP25:%.*]] = select fast <4 x i1> [[TMP1]], <4 x float> [[TMP24]], <4 x float> splat (float 0x47EFFFFFE0000000) ; CHECK-NEXT: [[TMP26:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP25]]) ; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = call fast float @llvm.minnum.f32(float [[TMP26]], float [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -287,7 +287,7 @@ define i32 @conditional_and(ptr noalias %A, ptr noalias %B, i32 %cond, i64 nound ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP24]], <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP24]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP25]]) ; CHECK-NEXT: [[TMP27]] = and i32 [[TMP26]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -549,7 +549,7 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i64> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[TMP24]], +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq <4 x i64> [[TMP24]], splat (i64 3) ; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP25]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP26]], i64 0 ; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] @@ -588,7 +588,7 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap ; CHECK: pred.load.continue14: ; CHECK-NEXT: [[TMP46:%.*]] = phi <4 x i64> [ [[TMP41]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP45]], [[PRED_LOAD_IF13]] ] ; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP26]], <4 x i64> [[TMP46]], <4 x i64> [[TMP24]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[PREDPHI_V]], <4 x i64> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[PREDPHI_V]], <4 x i64> splat (i64 -1) ; CHECK-NEXT: [[PREDPHI15]] = and <4 x i64> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -811,7 +811,7 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, ; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -850,7 +850,7 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP25:%.*]] = fadd fast <4 x float> [[TMP24]], [[VEC_PHI]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP25]], <4 x float> [[VEC_PHI]] -; CHECK-NEXT: [[TMP26:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP26:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], splat (float 7.000000e+00) ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP26]], i64 0 ; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] ; CHECK: pred.load.if7: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll index 1dfd8d93d2cf1f..3b07a76440e857 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll @@ -104,7 +104,7 @@ define i32 @reduction_smin_intrinsic(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) @@ -246,7 +246,7 @@ define i32 @reduction_umax_intrinsic(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index 89349314f27029..6771f561913130 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -13,7 +13,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -57,7 +57,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) ; CHECK-NEXT: [[TMP26]] = add i32 [[TMP25]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -100,7 +100,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -166,8 +166,8 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP46]]) ; CHECK-NEXT: [[TMP48]] = add i32 [[TMP47]], [[TMP45]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4) ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -214,7 +214,7 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -257,11 +257,11 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) ; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP25]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> , <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> splat (i32 3), <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]]) ; CHECK-NEXT: [[TMP29]] = add i32 [[TMP28]], [[TMP26]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -305,7 +305,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -361,18 +361,18 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK: pred.load.continue8: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]]) ; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> +; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]]) ; CHECK-NEXT: [[TMP45:%.*]] = mul i32 [[TMP44]], [[TMP42]] -; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]]) ; CHECK-NEXT: [[TMP48]] = mul i32 [[TMP47]], [[TMP45]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4) ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -420,7 +420,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE8]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -484,8 +484,8 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]]) ; CHECK-NEXT: [[TMP46]] = add i32 [[TMP45]], [[TMP43]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4) ; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -532,7 +532,7 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -588,14 +588,14 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]]) ; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> +; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]]) ; CHECK-NEXT: [[TMP45]] = mul i32 [[TMP44]], [[TMP42]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -640,7 +640,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -696,14 +696,14 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP40]]) ; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> +; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP43]]) ; CHECK-NEXT: [[TMP45]] = and i32 [[TMP44]], [[TMP42]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -748,7 +748,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -809,7 +809,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP41]]) ; CHECK-NEXT: [[TMP43]] = or i32 [[TMP42]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -854,7 +854,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -915,7 +915,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP41]]) ; CHECK-NEXT: [[TMP43]] = xor i32 [[TMP42]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -960,7 +960,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -1021,7 +1021,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP42:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP43]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: @@ -1066,7 +1066,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -1122,14 +1122,14 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> +; CHECK-NEXT: [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP40]]) ; CHECK-NEXT: [[TMP42:%.*]] = fmul fast float [[TMP41]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP43:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> +; CHECK-NEXT: [[TMP43:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP43]]) ; CHECK-NEXT: [[TMP45]] = fmul fast float [[TMP44]], [[TMP42]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: @@ -1174,7 +1174,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -1214,11 +1214,11 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> splat (i32 2147483647) ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]]) ; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: @@ -1261,7 +1261,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -1305,7 +1305,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]]) ; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: @@ -1354,16 +1354,16 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], +; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] ; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> , <4 x i1> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]] ; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 @@ -1440,8 +1440,8 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -1488,7 +1488,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32> ; CHECK-NEXT: [[TMP30]] = add nuw nsw <4 x i32> [[TMP1]], [[TMP29]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: @@ -1536,7 +1536,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -1583,7 +1583,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32> ; CHECK-NEXT: [[TMP29]] = and <4 x i32> [[VEC_PHI]], [[TMP28]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll index 99a0b708035884..f65f62bbf54bf2 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll @@ -79,13 +79,13 @@ define i32 @predicated(ptr noalias nocapture %A) { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP104:%.*]], [[PRED_LOAD_CONTINUE36]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP107:%.*]], [[PRED_LOAD_CONTINUE36]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP110:%.*]], [[PRED_LOAD_CONTINUE36]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i64> [[STEP_ADD1]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[STEP_ADD]], splat (i64 257) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i64> [[STEP_ADD_2]], splat (i64 257) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[STEP_ADD_3]], splat (i64 257) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -258,7 +258,7 @@ define i32 @predicated(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP109:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP108]]) ; CHECK-NEXT: [[TMP110]] = add i32 [[TMP109]], [[VEC_PHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-NEXT: [[TMP111:%.*]] = icmp eq i64 [[INDEX_NEXT]], 272 ; CHECK-NEXT: br i1 [[TMP111]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -302,7 +302,7 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], -16 ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i64 [[N]], -1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[COND:%.*]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLATINSERT7]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLATINSERT7]], splat (i32 7) ; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer @@ -314,9 +314,9 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP112:%.*]], [[PRED_LOAD_CONTINUE38]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[PRED_LOAD_CONTINUE38]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP118:%.*]], [[PRED_LOAD_CONTINUE38]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] @@ -484,20 +484,20 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE38]] ; CHECK: pred.load.continue35: ; CHECK-NEXT: [[TMP106:%.*]] = phi <4 x i32> [ [[TMP100]], [[PRED_LOAD_CONTINUE36]] ], [ [[TMP105]], [[PRED_LOAD_IF37]] ] -; CHECK-NEXT: [[TMP107:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP34]], <4 x i32> +; CHECK-NEXT: [[TMP107:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP34]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP108:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP107]]) ; CHECK-NEXT: [[TMP109]] = mul i32 [[TMP108]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP110:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP58]], <4 x i32> +; CHECK-NEXT: [[TMP110:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP58]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP111:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP110]]) ; CHECK-NEXT: [[TMP112]] = mul i32 [[TMP111]], [[VEC_PHI4]] -; CHECK-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP82]], <4 x i32> +; CHECK-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP82]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP114:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP113]]) ; CHECK-NEXT: [[TMP115]] = mul i32 [[TMP114]], [[VEC_PHI5]] -; CHECK-NEXT: [[TMP116:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP106]], <4 x i32> +; CHECK-NEXT: [[TMP116:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP106]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP117:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP116]]) ; CHECK-NEXT: [[TMP118]] = mul i32 [[TMP117]], [[VEC_PHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-NEXT: [[TMP119:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP119]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index 7c5d6a1edf0b4b..fe74a7c3a9b27c 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -69,7 +69,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP7]] = add i32 [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -174,7 +174,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP7]] = mul i32 [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -231,7 +231,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]]) ; CHECK-NEXT: [[TMP6]] = add i32 [[TMP5]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -689,16 +689,16 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], +; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] ; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> , <4 x i1> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]] ; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 @@ -832,7 +832,7 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[TMP7]] = add i32 [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: @@ -926,7 +926,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i8> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i8> [ splat (i8 -1), [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4 @@ -1122,9 +1122,9 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], +; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], splat (i8 31) +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], splat (i8 3) +; CHECK-NEXT: [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], splat (i8 31) ; CHECK-NEXT: [[TMP5:%.*]] = zext nneg <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP7]] = add i32 [[TMP6]], [[VEC_PHI]] @@ -1207,9 +1207,9 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], +; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], splat (i8 31) +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], splat (i8 3) +; CHECK-NEXT: [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], splat (i8 31) ; CHECK-NEXT: [[TMP5:%.*]] = zext nneg <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i32> zeroinitializer, <4 x i32> [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]]) @@ -1354,8 +1354,8 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP37]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP42]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <4 x i32> [[TMP43]], zeroinitializer -; CHECK-NEXT: [[NOT_:%.*]] = xor <4 x i1> [[TMP19]], -; CHECK-NEXT: [[DOTNOT7:%.*]] = select <4 x i1> [[NOT_]], <4 x i1> , <4 x i1> [[TMP44]] +; CHECK-NEXT: [[NOT_:%.*]] = xor <4 x i1> [[TMP19]], splat (i1 true) +; CHECK-NEXT: [[DOTNOT7:%.*]] = select <4 x i1> [[NOT_]], <4 x i1> splat (i1 true), <4 x i1> [[TMP44]] ; CHECK-NEXT: [[TMP45:%.*]] = bitcast <4 x i1> [[DOTNOT7]] to i4 ; CHECK-NEXT: [[TMP46:%.*]] = call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 [[TMP45]]) ; CHECK-NEXT: [[TMP47:%.*]] = zext nneg i4 [[TMP46]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-order.ll b/llvm/test/Transforms/LoopVectorize/reduction-order.ll index 7d8613b794f48c..1e3d6dd203f4da 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-order.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-order.ll @@ -9,8 +9,8 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 ; CHECK: [[VEC_PHI_1:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[ADD_5:%.+]], %vector.body ] ; CHECK: [[VEC_PHI_2:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[ADD_3:%.+]], %vector.body ] ; CHECK: icmp ule <4 x i64> -; CHECK-NEXT: [[ADD_3]] = add <4 x i32> , [[VEC_PHI_2]] -; CHECK-NEXT: [[ADD_5]] = add <4 x i32> [[VEC_PHI_1]], +; CHECK-NEXT: [[ADD_3]] = add <4 x i32> splat (i32 3), [[VEC_PHI_2]] +; CHECK-NEXT: [[ADD_5]] = add <4 x i32> [[VEC_PHI_1]], splat (i32 5) ; CHECK: select <4 x i1> {{.*}}, <4 x i32> [[ADD_5]], <4 x i32> ; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> [[ADD_3]], <4 x i32> ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll index 7cc58edbcb408d..491a9ec0f0d4da 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -13,7 +13,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -56,7 +56,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25]] = add <4 x i32> [[VEC_PHI]], [[TMP24]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -98,7 +98,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -159,7 +159,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP42:%.*]] = add <4 x i32> [[TMP41]], [[TMP39]] ; CHECK-NEXT: [[TMP43]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP42]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -204,8 +204,8 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -265,7 +265,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = mul <4 x i32> [[TMP40]], [[TMP39]] ; CHECK-NEXT: [[TMP42]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP41]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -309,8 +309,8 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -367,10 +367,10 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP40:%.*]] = and <4 x i32> [[TMP38]], [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP42]] = and <4 x i32> [[VEC_PHI]], [[TMP41]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -415,7 +415,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -475,7 +475,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42]] = or <4 x i32> [[VEC_PHI]], [[TMP41]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -520,7 +520,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -580,7 +580,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42]] = xor <4 x i32> [[VEC_PHI]], [[TMP41]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -625,7 +625,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -685,7 +685,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = fadd fast <4 x float> [[TMP40]], [[TMP39]] ; CHECK-NEXT: [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -730,7 +730,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -790,7 +790,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = fmul fast <4 x float> [[TMP40]], [[TMP39]] ; CHECK-NEXT: [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -834,8 +834,8 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -878,7 +878,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP24:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]]) ; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -920,8 +920,8 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -964,7 +964,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP24:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]]) ; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index 81364f1fd6aa8e..8a8439fca439d0 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -17,7 +17,7 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8> ; CHECK-NEXT: [[TMP4]] = zext <4 x i8> [[TMP3]] to <4 x i32> @@ -93,10 +93,10 @@ define i8 @PR34687_no_undef(i1 %c, i32 %x, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> , [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> splat (i32 99), [[TMP0]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[PREDPHI]] ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8> ; CHECK-NEXT: [[TMP6]] = zext <4 x i8> [[TMP5]] to <4 x i32> @@ -175,8 +175,8 @@ define i32 @PR35734(i32 %x, i32 %y) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], splat (i32 -1) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i1> ; CHECK-NEXT: [[TMP6]] = sext <4 x i1> [[TMP5]] to <4 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll index 0d9918b74a2ff2..9d87a4c7035e1f 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll @@ -229,7 +229,7 @@ for.end: ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 2 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP11]], i32 3 ; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP17:%.*]] = or disjoint <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP17:%.*]] = or disjoint <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP17]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1 @@ -248,7 +248,7 @@ for.end: ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 3 ; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP33]], [[TMP16]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -516,7 +516,7 @@ define void @test_drop_poison_generating_dead_recipe(ptr %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ [[TMP0:%.*]], %vector.body ] -; CHECK-NEXT: [[TMP0]] = add <4 x i64> [[VEC_PHI]], +; CHECK-NEXT: [[TMP0]] = add <4 x i64> [[VEC_PHI]], splat (i64 -31364) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 360 ; CHECK-NEXT: br i1 [[TMP1]], label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index d574ec720e92a5..bbdff969ebddde 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -25,7 +25,7 @@ define i32 @reduction_sum(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP7]] = add <4 x i32> [[TMP6]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -96,7 +96,7 @@ define i32 @reduction_prod(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 @@ -106,7 +106,7 @@ define i32 @reduction_prod(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP7]] = mul <4 x i32> [[TMP6]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -187,7 +187,7 @@ define i32 @reduction_mix(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND]] ; CHECK-NEXT: [[TMP7]] = add <4 x i32> [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -268,7 +268,7 @@ define i32 @reduction_mul(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP7]] = mul <4 x i32> [[TMP6]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -416,7 +416,7 @@ define i32 @reduction_and(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] @@ -760,16 +760,16 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], +; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] ; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> , <4 x i1> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]] ; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 @@ -1059,7 +1059,7 @@ define i32 @reduction_sum_multiuse(i32 %n, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP7]] = add <4 x i32> [[TMP6]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index d983c5138164fc..70d9290f41bbde 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -261,11 +261,11 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 1) +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], splat (i8 1) ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[TMP0]] @@ -278,7 +278,7 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE2]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], splat (i8 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -330,11 +330,11 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 1) +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], splat (i8 1) ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[TMP0]] @@ -347,7 +347,7 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE2]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], splat (i8 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll index e7e63e55802fe1..bb515cd583e5bc 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll @@ -36,7 +36,7 @@ define void @load_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4 @@ -109,7 +109,7 @@ define void @store_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4 @@ -190,7 +190,7 @@ define void @load_clamped_index_offset_1(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP10]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP16]], align 4 @@ -375,7 +375,7 @@ define void @clamped_index_equal_dependence(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index 9521c0933fe876..ccf02b96abc9d2 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -32,7 +32,7 @@ define i32 @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], !dbg [[DBG9]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]], !dbg [[DBG9]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4, !dbg [[DBG9]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[WIDE_LOAD]], , !dbg [[DBG9]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00), !dbg [[DBG9]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]], !dbg [[DBG9]] ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP4]], align 4, !dbg [[DBG9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG9]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll index 07e7b462e7bf98..da8e2add18a674 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll @@ -1460,7 +1460,7 @@ define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr noca ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/scalarize-masked-call.ll b/llvm/test/Transforms/LoopVectorize/scalarize-masked-call.ll index beac46d2eb9d97..0eddc913702720 100644 --- a/llvm/test/Transforms/LoopVectorize/scalarize-masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/scalarize-masked-call.ll @@ -16,7 +16,7 @@ define void @cond_call(ptr readonly %src, ptr noalias %dest, i64 %N) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_CALL_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[WIDE_LOAD]], splat (i64 5) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_CALL_IF:%.*]], label [[PRED_CALL_CONTINUE:%.*]] ; CHECK: pred.call.if: @@ -62,7 +62,7 @@ define void @cond_call(ptr readonly %src, ptr noalias %dest, i64 %N) { ; CHECK-NEXT: store i64 [[ST_VALUE]], ptr [[ST_ADDR]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[LOOPCOND]], label [[END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[LOOPCOND]], label [[END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll index 43eeefb7744900..37a998cb7e9387 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll @@ -43,7 +43,7 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP16]], align 8 ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP18]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll index 5b140d98d83c0b..c3931930a686ea 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll @@ -36,7 +36,7 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC1-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4-IC1-NEXT: [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP3]] -; CHECK-VF4-IC1-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-VF4-IC1-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-VF4-IC1-NEXT: [[TMP6]] = or <4 x i1> [[VEC_PHI]], [[TMP5]] ; CHECK-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-VF4-IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -99,8 +99,8 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP6]] ; CHECK-VF4-IC2-NEXT: [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]] -; CHECK-VF4-IC2-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], -; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], +; CHECK-VF4-IC2-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; CHECK-VF4-IC2-NEXT: [[TMP12]] = or <4 x i1> [[VEC_PHI]], [[TMP10]] ; CHECK-VF4-IC2-NEXT: [[TMP13]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]] ; CHECK-VF4-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -260,7 +260,7 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC1-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4-IC1-NEXT: [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP3]] -; CHECK-VF4-IC1-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-VF4-IC1-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-VF4-IC1-NEXT: [[TMP6]] = or <4 x i1> [[VEC_PHI]], [[TMP5]] ; CHECK-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-VF4-IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -323,8 +323,8 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD4]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP6]] ; CHECK-VF4-IC2-NEXT: [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]] -; CHECK-VF4-IC2-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], -; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], +; CHECK-VF4-IC2-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; CHECK-VF4-IC2-NEXT: [[TMP12]] = or <4 x i1> [[VEC_PHI]], [[TMP10]] ; CHECK-VF4-IC2-NEXT: [[TMP13]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]] ; CHECK-VF4-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -495,7 +495,7 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]] ; CHECK-VF4-IC1-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4-IC1-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI2]], [[TMP4]] -; CHECK-VF4-IC1-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-VF4-IC1-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) ; CHECK-VF4-IC1-NEXT: [[TMP7]] = or <4 x i1> [[VEC_PHI]], [[TMP6]] ; CHECK-VF4-IC1-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-VF4-IC1-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] @@ -613,8 +613,8 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP8:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD5]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]] ; CHECK-VF4-IC2-NEXT: [[TMP10]] = or <4 x i1> [[VEC_PHI4]], [[TMP8]] -; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], -; CHECK-VF4-IC2-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP8]], +; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; CHECK-VF4-IC2-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) ; CHECK-VF4-IC2-NEXT: [[TMP13]] = or <4 x i1> [[VEC_PHI]], [[TMP11]] ; CHECK-VF4-IC2-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI2]], [[TMP12]] ; CHECK-VF4-IC2-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 @@ -905,7 +905,7 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no ; CHECK-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC1-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4-IC1-NEXT: [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP3]] -; CHECK-VF4-IC1-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-VF4-IC1-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-VF4-IC1-NEXT: [[TMP6]] = or <4 x i1> [[VEC_PHI]], [[TMP5]] ; CHECK-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-VF4-IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -971,8 +971,8 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no ; CHECK-VF4-IC2-NEXT: [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP6]] ; CHECK-VF4-IC2-NEXT: [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]] -; CHECK-VF4-IC2-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], -; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], +; CHECK-VF4-IC2-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-VF4-IC2-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; CHECK-VF4-IC2-NEXT: [[TMP12]] = or <4 x i1> [[VEC_PHI]], [[TMP10]] ; CHECK-VF4-IC2-NEXT: [[TMP13]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]] ; CHECK-VF4-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll index 65590a2963bc5e..155bb5c4ab0b11 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll @@ -6,7 +6,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-VF2IC1: vector.body: ; CHECK-VF2IC1: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue2 ] ; CHECK-VF2IC1: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr {{%.*}}, align 4 -; CHECK-VF2IC1-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], +; CHECK-VF2IC1-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], splat (i32 35) ; CHECK-VF2IC1-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-VF2IC1-NEXT: br i1 [[TMP5]], label %pred.load.if, label %pred.load.continue ; CHECK-VF2IC1: pred.load.if: @@ -25,7 +25,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-VF2IC1-NEXT: br label %pred.load.continue2 ; CHECK-VF2IC1: pred.load.continue2: ; CHECK-VF2IC1-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %pred.load.continue ], [ [[TMP14]], %pred.load.if1 ] -; CHECK-VF2IC1-NEXT: [[TMP16:%.*]] = icmp eq <2 x i32> [[TMP15]], +; CHECK-VF2IC1-NEXT: [[TMP16:%.*]] = icmp eq <2 x i32> [[TMP15]], splat (i32 2) ; CHECK-VF2IC1-NEXT: [[TMP17:%.*]] = or <2 x i1> [[VEC_PHI]], [[TMP16]] ; CHECK-VF2IC1-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP17]], <2 x i1> [[VEC_PHI]] ; CHECK-VF2IC1: br i1 {{%.*}}, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index b3a24b472993be..7b66440a7fdcc2 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -7,8 +7,8 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], splat (i32 3) +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) @@ -20,14 +20,14 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <4 x i32> {{.*}}, -; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <4 x i32> {{.*}}, -; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <4 x i32> {{.*}}, -; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <4 x i32> {{.*}}, -; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <4 x i1> [[VEC_ICMP1]], -; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <4 x i1> [[VEC_ICMP2]], -; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <4 x i1> [[VEC_ICMP3]], -; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <4 x i1> [[VEC_ICMP4]], +; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <4 x i32> {{.*}}, splat (i32 3) +; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <4 x i32> {{.*}}, splat (i32 3) +; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <4 x i32> {{.*}}, splat (i32 3) +; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <4 x i32> {{.*}}, splat (i32 3) +; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <4 x i1> [[VEC_ICMP1]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <4 x i1> [[VEC_ICMP2]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <4 x i1> [[VEC_ICMP3]], splat (i1 true) +; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <4 x i1> [[VEC_ICMP4]], splat (i1 true) ; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or <4 x i1> [[VEC_PHI1]], [[NOT1]] ; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or <4 x i1> [[VEC_PHI2]], [[NOT2]] ; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <4 x i1> [[VEC_PHI3]], [[NOT3]] @@ -93,7 +93,7 @@ define i32 @select_const_i32_from_icmp2(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], splat (i32 3) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[VEC_ICMP]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) @@ -127,8 +127,8 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], splat (i32 3) +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) @@ -158,8 +158,8 @@ define i32 @select_const_i32_from_fcmp_fast(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float> -; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp fast ueq <4 x float> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], +; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp fast ueq <4 x float> [[VEC_LOAD]], splat (float 3.000000e+00) +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) @@ -189,8 +189,8 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float> -; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp ueq <4 x float> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], +; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp ueq <4 x float> [[VEC_LOAD]], splat (float 3.000000e+00) +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) @@ -221,10 +221,10 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 ; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-VF4IC1-NOT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 -; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[SPLAT_OF_A]], +; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[SPLAT_OF_A]], splat (i32 3) ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], splat (i1 true) ; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll index f747993c54fc99..34fc0587c5d87b 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll @@ -11,22 +11,22 @@ define i64 @pr62565_incoming_value_known_undef(i64 %a, ptr %src) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%*]] = xor <2 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5]] = or <2 x i1> [[VEC_PHI]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) -; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 undef +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[A]], i64 undef ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] @@ -74,22 +74,22 @@ define i64 @pr62565_incoming_value_known_poison(i64 %a, ptr %src) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5]] = or <2 x i1> [[VEC_PHI]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) -; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 poison +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[A]], i64 poison ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] @@ -137,22 +137,22 @@ define i64 @pr62565_incoming_value_may_be_poison(i64 %a, ptr %src, i64 %start) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5]] = or <2 x i1> [[VEC_PHI]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) -; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 [[START]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[A]], i64 [[START]] ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll index 087a0aa429b7ed..b7fc1e83ce23f4 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -29,8 +29,8 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP2]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], splat (i32 10) +; CHECK-NEXT: [[TMP2]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_PHI]], <4 x i32> splat (i32 10) ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -91,7 +91,7 @@ define i32 @pr66895_tail_fold_reduction_exit_inst_gets_simplified(i32 %n) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 12) ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_PHI]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll index 6407583061e601..d99dcbb087d491 100644 --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -24,12 +24,12 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> , <2 x i16> [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> splat (i16 1), <2 x i16> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <2 x i16> [[PREDPHI]], ptr [[TMP7]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -111,14 +111,14 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x i16> zeroinitializer, <2 x i16> [[WIDE_LOAD]] -; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> , <2 x i16> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> splat (i16 1), <2 x i16> [[PREDPHI]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP9]], i32 0 ; CHECK-NEXT: store <2 x i16> [[PREDPHI1]], ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -208,9 +208,9 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store <2 x i16> [[TMP9]], ptr [[TMP11]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], -; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2) +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], splat (i16 2) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -303,14 +303,14 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], +; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP17]], <2 x i16> zeroinitializer, <2 x i16> [[TMP14]] -; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> , <2 x i16> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> splat (i16 1), <2 x i16> [[PREDPHI]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0 ; CHECK-NEXT: store <2 x i16> [[PREDPHI3]], ptr [[TMP19]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -387,7 +387,7 @@ define void @duplicated_incoming_blocks_blend(i32 %x, ptr %ptr) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll index 54bbf3f22ec011..9efc8d12f44a39 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll @@ -11,7 +11,7 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 200) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: @@ -49,11 +49,11 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; CHECK: [[PRED_STORE_CONTINUE6]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 204 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[exit:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 204, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] @@ -64,8 +64,8 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK-NEXT: store ptr [[TMP18]], ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV]], 200 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[exit]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: [[exit]]: +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll index 11d0aac7429797..a757314ec7a46a 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll @@ -45,8 +45,8 @@ while.end: ; define void @reuse_const_btc(ptr %A) optsize { ; CHECK-LABEL: @reuse_const_btc -; CHECK: {{%.*}} = icmp ule <4 x i32> {{%.*}}, -; CHECK: {{%.*}} = select <4 x i1> {{%.*}}, <4 x i32> , <4 x i32> +; CHECK: {{%.*}} = icmp ule <4 x i32> {{%.*}}, splat (i32 13) +; CHECK: {{%.*}} = select <4 x i1> {{%.*}}, <4 x i32> splat (i32 12), <4 x i32> splat (i32 13) ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll index e4572a2b0a065a..dd2fd0de9938ef 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll @@ -14,8 +14,8 @@ define void @tail_fold_switch(ptr %dst, i32 %0) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1) ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] @@ -50,7 +50,7 @@ define void @tail_fold_switch(ptr %dst, i32 %0) { ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; CHECK: [[PRED_STORE_CONTINUE6]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll index 1b646200e1c7c5..ec7f036ca27d3f 100644 --- a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll +++ b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll @@ -25,7 +25,7 @@ define i64 @multi_exit_1_exit_count_with_udiv_by_value_in_header(ptr %dst, i64 % ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -91,7 +91,7 @@ define i64 @multi_exit_1_exit_count_with_udiv_by_constant_in_header(ptr %dst, i6 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -158,7 +158,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -270,7 +270,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 10) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -482,7 +482,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch(ptr %dst, i64 %N ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -554,7 +554,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_call_before_loop ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP9]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -626,7 +626,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_loop_may_not_exe ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP9]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -698,7 +698,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bounds ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP9]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -767,7 +767,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_frozen_value_in_latch(ptr %dst, ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP8]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -835,7 +835,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_constant_in_latch(ptr %dst, i64 ; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[GEP]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP20:![0-9]+]] @@ -897,7 +897,7 @@ define void @single_exit_tc_with_udiv(ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP4]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] @@ -960,7 +960,7 @@ define i64 @multi_exit_4_exit_count_with_urem_by_value_in_latch(ptr %dst, i64 %N ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP8]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] @@ -1027,7 +1027,7 @@ define i64 @multi_exit_4_exit_count_with_urem_by_constant_in_latch(ptr %dst, i64 ; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[GEP]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP26:![0-9]+]] @@ -1177,7 +1177,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch1(ptr %dst, i64 % ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] @@ -1290,7 +1290,7 @@ define i64 @multi_exit_count_with_udiv_by_value_in_latch_different_bounds_diviso ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[TMP7]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll index 048d18ea8422d0..b5bef4aacff3cf 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll @@ -17,7 +17,7 @@ define i32 @test_icmp_constant_op_zext(ptr %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i16 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: store <4 x i8> , ptr [[TMP2]], align 1 +; CHECK-NEXT: store <4 x i8> splat (i8 109), ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll index cae6ece9f9d8c9..14608d5068ed33 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll @@ -4,24 +4,24 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @test_pr47927_lshr_const_shift_ops(ptr %dst, i32 %f) { -; CHECK-LABEL: define void @test_pr47927_lshr_const_shift_ops -; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) { +; CHECK-LABEL: define void @test_pr47927_lshr_const_shift_ops( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[BROADCAST_SPLAT]], splat (i32 18) +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[TMP0]] to <4 x i8> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -64,24 +64,24 @@ exit: } define void @test_shl_const_shift_ops(ptr %dst, i32 %f) { -; CHECK-LABEL: define void @test_shl_const_shift_ops -; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) { +; CHECK-LABEL: define void @test_shl_const_shift_ops( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[BROADCAST_SPLAT]], splat (i32 18) +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[TMP0]] to <4 x i8> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -124,24 +124,24 @@ exit: } define void @test_ashr_const_shift_ops(ptr %dst, i32 %f) { -; CHECK-LABEL: define void @test_ashr_const_shift_ops -; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) { +; CHECK-LABEL: define void @test_ashr_const_shift_ops( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], splat (i32 18) +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[TMP0]] to <4 x i8> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -184,8 +184,8 @@ exit: } define void @test_shl_const_shifted_op(ptr %dst, i32 %f) { -; CHECK-LABEL: define void @test_shl_const_shifted_op -; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) { +; CHECK-LABEL: define void @test_shl_const_shifted_op( +; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -199,7 +199,7 @@ define void @test_shl_const_shifted_op(ptr %dst, i32 %f) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> splat (i32 19), [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i8> ; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -249,8 +249,8 @@ exit: define void @test_lshr_by_18(ptr %A) { -; CHECK-LABEL: define void @test_lshr_by_18 -; CHECK-SAME: (ptr [[A:%.*]]) { +; CHECK-LABEL: define void @test_lshr_by_18( +; CHECK-SAME: ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -264,7 +264,7 @@ define void @test_lshr_by_18(ptr %A) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], splat (i32 18) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i8> ; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -313,8 +313,8 @@ exit: } define void @test_lshr_by_4(ptr %A) { -; CHECK-LABEL: define void @test_lshr_by_4 -; CHECK-SAME: (ptr [[A:%.*]]) { +; CHECK-LABEL: define void @test_lshr_by_4( +; CHECK-SAME: ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -328,12 +328,12 @@ define void @test_lshr_by_4(ptr %A) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i16> [[TMP4]], splat (i16 4) ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i16> [[TMP5]] to <4 x i8> ; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -375,19 +375,3 @@ loop: exit: ret void } -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} -; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} -; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} -; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} -; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index c9fc8beb006d9b..85011d5f446c1d 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -153,7 +153,7 @@ define void @blend_chain_iv(i1 %c) { ; CHECK-NEXT: store i16 0, ptr [[TMP6]], align 2 ; CHECK-NEXT: store i16 0, ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll index 4cee3e3cb6832e..5e8b60b910aed0 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll @@ -18,7 +18,7 @@ define void @ld_div1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -78,7 +78,7 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -133,7 +133,7 @@ define void @ld_div3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -142,12 +142,12 @@ define void @ld_div3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -200,7 +200,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -209,7 +209,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -217,7 +217,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -273,7 +273,7 @@ define void @ld_div2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 @@ -333,7 +333,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -342,7 +342,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -350,7 +350,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -403,7 +403,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -412,7 +412,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -420,7 +420,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -473,7 +473,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -482,7 +482,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -490,7 +490,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -546,7 +546,7 @@ define void @ld_div3_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 @@ -608,7 +608,7 @@ define void @ld_div1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -664,7 +664,7 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -673,12 +673,12 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: @@ -730,7 +730,7 @@ define void @ld_div3_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -739,12 +739,12 @@ define void @ld_div3_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: @@ -798,7 +798,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -807,7 +807,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -815,7 +815,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: @@ -872,7 +872,7 @@ define void @ld_div2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 @@ -933,7 +933,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -942,7 +942,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -950,7 +950,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: @@ -1004,7 +1004,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -1013,7 +1013,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -1021,7 +1021,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: @@ -1075,7 +1075,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -1084,7 +1084,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -1092,7 +1092,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: middle.block: @@ -1149,7 +1149,7 @@ define void @ld_div3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 @@ -1203,14 +1203,14 @@ define void @test_step_is_not_invariant(ptr %A) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <2 x i32> [[VEC_IND]], [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = udiv <2 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = udiv <2 x i16> [[TMP4]], splat (i16 6) ; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i16> [[TMP5]] to <2 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP7]] @@ -1218,8 +1218,8 @@ define void @test_step_is_not_invariant(ptr %A) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP9]] ; CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP8]], align 2 ; CHECK-NEXT: store i16 [[TMP2]], ptr [[TMP10]], align 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 56 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll index 7f8b33e97360cc..4200ff96689384 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll @@ -18,7 +18,7 @@ define void @ld_and_neg1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -78,7 +78,7 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -133,7 +133,7 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -3) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -142,12 +142,12 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -200,7 +200,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -209,7 +209,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -217,7 +217,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -274,7 +274,7 @@ define void @ld_and_neg2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 @@ -334,7 +334,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -343,7 +343,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -351,7 +351,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -404,7 +404,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -413,7 +413,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -421,7 +421,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -473,7 +473,7 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -482,12 +482,12 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -541,7 +541,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -550,7 +550,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -558,7 +558,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: @@ -612,7 +612,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -621,7 +621,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -629,7 +629,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: @@ -683,7 +683,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -3) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -692,7 +692,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -700,7 +700,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll index 098e29eb691661..c5a8e749997548 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll @@ -16,8 +16,8 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = udiv <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = urem <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = udiv <8 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP2:%.*]] = urem <8 x i64> [[TMP1]], splat (i64 3) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[TMP2]], i32 1 @@ -50,12 +50,12 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <8 x i64> [[TMP31]], i64 [[TMP24]], i32 5 ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x i64> [[TMP32]], i64 [[TMP25]], i32 6 ; CHECK-NEXT: [[TMP34:%.*]] = insertelement <8 x i64> [[TMP33]], i64 [[TMP26]], i32 7 -; CHECK-NEXT: [[TMP35:%.*]] = add nsw <8 x i64> [[TMP34]], +; CHECK-NEXT: [[TMP35:%.*]] = add nsw <8 x i64> [[TMP34]], splat (i64 42) ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i64, ptr [[TMP36]], i32 0 ; CHECK-NEXT: store <8 x i64> [[TMP35]], ptr [[TMP37]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -108,9 +108,9 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP2:%.*]] = udiv <8 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = urem <8 x i64> [[TMP2]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = udiv <8 x i64> [[TMP1]], splat (i64 2) +; CHECK-NEXT: [[TMP3:%.*]] = urem <8 x i64> [[TMP2]], splat (i64 3) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP3]], i32 1 @@ -143,12 +143,12 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x i64> [[TMP32]], i64 [[TMP25]], i32 5 ; CHECK-NEXT: [[TMP34:%.*]] = insertelement <8 x i64> [[TMP33]], i64 [[TMP26]], i32 6 ; CHECK-NEXT: [[TMP35:%.*]] = insertelement <8 x i64> [[TMP34]], i64 [[TMP27]], i32 7 -; CHECK-NEXT: [[TMP36:%.*]] = add nsw <8 x i64> [[TMP35]], +; CHECK-NEXT: [[TMP36:%.*]] = add nsw <8 x i64> [[TMP35]], splat (i64 42) ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i64, ptr [[TMP37]], i32 0 ; CHECK-NEXT: store <8 x i64> [[TMP36]], ptr [[TMP38]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -201,7 +201,7 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = udiv <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = udiv <8 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i64> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[TMP1]], i32 1 @@ -234,12 +234,12 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x i64> [[TMP30]], i64 [[TMP23]], i32 5 ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <8 x i64> [[TMP31]], i64 [[TMP24]], i32 6 ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x i64> [[TMP32]], i64 [[TMP25]], i32 7 -; CHECK-NEXT: [[TMP34:%.*]] = add nsw <8 x i64> [[TMP33]], +; CHECK-NEXT: [[TMP34:%.*]] = add nsw <8 x i64> [[TMP33]], splat (i64 42) ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i64, ptr [[TMP35]], i32 0 ; CHECK-NEXT: store <8 x i64> [[TMP34]], ptr [[TMP36]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -296,7 +296,7 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP4]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; CHECK-NEXT: store <8 x i64> [[TMP5]], ptr [[TMP7]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll index f79772915b0248..17b94495b517ce 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll @@ -19,7 +19,7 @@ define void @ld_lshr0_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 -; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -58,7 +58,7 @@ define void @ld_lshr0_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 -; VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], +; VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -118,7 +118,7 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], +; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -154,7 +154,7 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) ; VF4-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0 ; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i32 1 @@ -171,12 +171,12 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP11]], i32 1 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> [[TMP15]], i64 [[TMP12]], i32 2 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 3 -; VF4-NEXT: [[TMP18:%.*]] = add nsw <4 x i64> [[TMP17]], +; VF4-NEXT: [[TMP18:%.*]] = add nsw <4 x i64> [[TMP17]], splat (i64 42) ; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP18]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: middle.block: @@ -232,7 +232,7 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], +; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -272,7 +272,7 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0 ; VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], +; VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP6]], align 8 @@ -338,7 +338,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -346,7 +346,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF2: middle.block: @@ -399,7 +399,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3 -; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], +; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42) ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -413,7 +413,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF4: middle.block: @@ -469,7 +469,7 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; VF2-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; VF2-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 @@ -515,7 +515,7 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 -; VF4-NEXT: [[TMP7:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], +; VF4-NEXT: [[TMP7:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -590,7 +590,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -598,7 +598,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF2: middle.block: @@ -651,7 +651,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3 -; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], +; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42) ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -665,7 +665,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF4: middle.block: @@ -718,7 +718,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 @@ -727,7 +727,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], +; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -735,7 +735,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP13]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF2: middle.block: @@ -771,7 +771,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) ; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0 ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1 @@ -788,7 +788,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3 -; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], +; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42) ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -802,7 +802,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 ; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP25]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF4: middle.block: @@ -855,7 +855,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; VF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -864,12 +864,12 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; VF2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 -; VF2-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], +; VF2-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) ; VF2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF2: middle.block: @@ -902,7 +902,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) ; VF4-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0 ; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i32 1 @@ -919,12 +919,12 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP11]], i32 1 ; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> [[TMP15]], i64 [[TMP12]], i32 2 ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 3 -; VF4-NEXT: [[TMP18:%.*]] = add nsw <4 x i64> [[TMP17]], +; VF4-NEXT: [[TMP18:%.*]] = add nsw <4 x i64> [[TMP17]], splat (i64 42) ; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP18]], ptr [[TMP20]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF4: middle.block: @@ -981,7 +981,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; VF2-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], +; VF2-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 @@ -1028,7 +1028,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8 -; VF4-NEXT: [[TMP8:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], +; VF4-NEXT: [[TMP8:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -1095,7 +1095,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -1104,7 +1104,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -1112,7 +1112,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF2: middle.block: @@ -1149,7 +1149,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 @@ -1166,7 +1166,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3 -; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], +; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42) ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -1180,7 +1180,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF4: middle.block: @@ -1234,7 +1234,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 @@ -1243,7 +1243,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 @@ -1251,7 +1251,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF2: middle.block: @@ -1288,7 +1288,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 2) ; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 ; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 @@ -1305,7 +1305,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1 ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3 -; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], +; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42) ; VF4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -1319,7 +1319,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 ; VF4-NEXT: store i64 [[TMP30]], ptr [[TMP26]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF4: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll index 098835afa4480a..4bc5aee381bc9a 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll @@ -15,8 +15,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1) ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -26,13 +26,13 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2: middle.block: @@ -70,8 +70,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1) ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -89,13 +89,13 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 -; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4: middle.block: @@ -163,7 +163,7 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i64 0 ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; VF2-NEXT: [[TMP7:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], +; VF2-NEXT: [[TMP7:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42) ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP9]], align 8 @@ -205,8 +205,8 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2) ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -224,13 +224,13 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 -; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: middle.block: @@ -291,8 +291,8 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3) ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -302,13 +302,13 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF2: middle.block: @@ -346,8 +346,8 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3) ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -365,13 +365,13 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 -; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF4: middle.block: @@ -434,8 +434,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -445,7 +445,7 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 @@ -453,8 +453,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF2: middle.block: @@ -496,8 +496,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] @@ -515,7 +515,7 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -529,8 +529,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF4: middle.block: @@ -593,8 +593,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -604,7 +604,7 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 @@ -612,8 +612,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF2: middle.block: @@ -655,8 +655,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] @@ -674,7 +674,7 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -688,8 +688,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF4: middle.block: @@ -752,8 +752,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -763,7 +763,7 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 @@ -771,8 +771,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF2: middle.block: @@ -814,8 +814,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] @@ -833,7 +833,7 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -847,8 +847,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF4: middle.block: @@ -911,8 +911,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -922,7 +922,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 @@ -930,8 +930,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF2: middle.block: @@ -973,8 +973,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] @@ -992,7 +992,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -1006,8 +1006,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF4: middle.block: @@ -1070,8 +1070,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -1081,7 +1081,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 @@ -1089,8 +1089,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF2: middle.block: @@ -1132,8 +1132,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] @@ -1151,7 +1151,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -1165,8 +1165,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF4: middle.block: @@ -1229,8 +1229,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3) ; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -1240,7 +1240,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 @@ -1248,8 +1248,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 ; VF2-NEXT: store i64 [[TMP17]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF2: middle.block: @@ -1291,8 +1291,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3) ; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] ; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 ; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] @@ -1310,7 +1310,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) ; VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] @@ -1324,8 +1324,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 ; VF4-NEXT: store i64 [[TMP31]], ptr [[TMP27]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF4: middle.block: @@ -1387,8 +1387,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1) ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -1398,13 +1398,13 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF2: middle.block: @@ -1443,8 +1443,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1) ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -1462,13 +1462,13 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 -; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF4: middle.block: @@ -1530,8 +1530,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -1541,13 +1541,13 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF2: middle.block: @@ -1586,8 +1586,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2) ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -1605,13 +1605,13 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 -; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF4: middle.block: @@ -1673,8 +1673,8 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND2:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3) ; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -1684,13 +1684,13 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF2: middle.block: @@ -1729,8 +1729,8 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3) ; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] ; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] @@ -1748,13 +1748,13 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 ; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 -; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) ; VF4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0 ; VF4-NEXT: store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; VF4-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF4: middle.block: @@ -1818,8 +1818,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] @@ -1829,7 +1829,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], +; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 @@ -1837,8 +1837,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF2: middle.block: @@ -1881,8 +1881,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] @@ -1900,7 +1900,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], +; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -1914,8 +1914,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF4: middle.block: @@ -1979,8 +1979,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] @@ -1990,7 +1990,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], +; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 @@ -1998,8 +1998,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF2: middle.block: @@ -2042,8 +2042,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] @@ -2061,7 +2061,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], +; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -2075,8 +2075,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF4: middle.block: @@ -2140,8 +2140,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] @@ -2151,7 +2151,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], +; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 @@ -2159,8 +2159,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF2: middle.block: @@ -2203,8 +2203,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] @@ -2222,7 +2222,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], +; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -2236,8 +2236,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF4: middle.block: @@ -2301,8 +2301,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] @@ -2312,7 +2312,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], +; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 @@ -2320,8 +2320,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; VF2: middle.block: @@ -2364,8 +2364,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] @@ -2383,7 +2383,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], +; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -2397,8 +2397,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; VF4: middle.block: @@ -2462,8 +2462,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] @@ -2473,7 +2473,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], +; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 @@ -2481,8 +2481,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; VF2: middle.block: @@ -2525,8 +2525,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] @@ -2544,7 +2544,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], +; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -2558,8 +2558,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; VF4: middle.block: @@ -2623,8 +2623,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3) ; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] @@ -2634,7 +2634,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], +; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 @@ -2642,8 +2642,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 ; VF2-NEXT: store i64 [[TMP18]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], +; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) +; VF2-NEXT: [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; VF2: middle.block: @@ -2686,8 +2686,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3) ; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] @@ -2705,7 +2705,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 ; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], +; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) ; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -2719,8 +2719,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP28]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; VF4-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 ; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; VF4: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll index d79b4a7cefc257..d7d7d5d9c5da0e 100644 --- a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll +++ b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll @@ -181,7 +181,7 @@ define void @test_not_first_lane_only_wide_compare_incoming_order_swapped(ptr %A ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], ptr [[B]], ptr poison ; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[PREDPHI]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/vector-geps.ll b/llvm/test/Transforms/LoopVectorize/vector-geps.ll index c9c1cd1c1817b8..e307d93b1896a8 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-geps.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-geps.ll @@ -20,7 +20,7 @@ define void @vector_gep_stored(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll b/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll index cb1ac49986f68c..76d3f6ca4c5adb 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll @@ -7,7 +7,7 @@ ; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i16, ptr %src, i32 [[IDX0]] ; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i16, ptr [[GEP]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.+]] = load <4 x i16>, ptr [[GEP0]], align 2 -; CHECK-NEXT: [[FSHL:%.+]] = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD]], <4 x i16> ) +; CHECK-NEXT: [[FSHL:%.+]] = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD]], <4 x i16> splat (i16 15)) ; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i16, ptr %dst, i32 [[IDX0]] ; CHECK-NEXT: [[GEP1:%.+]] = getelementptr inbounds i16, ptr [[GEP0]], i32 0 ; CHECK-NEXT: store <4 x i16> [[FSHL]], ptr [[GEP1]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll index 5e65832aba8cc4..bb938045a54132 100644 --- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll @@ -365,7 +365,7 @@ define void @test_versioned_with_non_ex_use(i32 %offset, ptr noalias %dst.1, ptr ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i32 0 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP21]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -440,7 +440,7 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress { ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 -; CHECK-NEXT: store <4 x i16> , ptr [[TMP5]], align 2 +; CHECK-NEXT: store <4 x i16> splat (i16 1), ptr [[TMP5]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -506,7 +506,7 @@ define void @sext_of_i1_stride(i1 %g, ptr %dst) mustprogress { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: store <4 x i16> , ptr [[TMP7]], align 2 +; CHECK-NEXT: store <4 x i16> splat (i16 -1), ptr [[TMP7]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll index 1552af3140e383..7aa7293de9bbd6 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll @@ -22,26 +22,26 @@ define void @inner_loop_reduction(ptr noalias nocapture readonly %a.in, ptr noal ; CHECK-NEXT: %[[FOR1_INDEX:.*]] = phi i64 [ 0, %[[LABEL_PR:.*]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH:.*]] ] ; CHECK: %[[VEC_INDEX:.*]] = phi <4 x i64> [ , %[[LABEL_PR]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH]] ] ; CHECK-NEXT: %[[A_PTR:.*]] = getelementptr inbounds double, ptr %a.in, <4 x i64> %[[VEC_INDEX]] -; CHECK-NEXT: %[[MASKED_GATHER1:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[A_PTR]], i32 8, <4 x i1> , <4 x double> poison) +; CHECK-NEXT: %[[MASKED_GATHER1:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[A_PTR]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison) ; CHECK-NEXT: %[[B_PTR:.*]] = getelementptr inbounds double, ptr %b.in, <4 x i64> %[[VEC_INDEX]] -; CHECK-NEXT: %[[MASKED_GATHER2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[B_PTR]], i32 8, <4 x i1> , <4 x double> poison) +; CHECK-NEXT: %[[MASKED_GATHER2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[B_PTR]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison) ; CHECK-NEXT: br label %[[FOR2_HEADER:.*]] ; CHECK: [[FOR2_HEADER]]: ; CHECK-NEXT: %[[FOR2_INDEX:.*]] = phi <4 x i32> [ zeroinitializer, %vector.body ], [ %[[FOR2_INDEX_NEXT:.*]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[MASKED_GATHER1]], %vector.body ], [ %[[REDUCTION_NEXT:.*]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[REDUCTION_NEXT]] = fadd <4 x double> %[[MASKED_GATHER2]], %[[REDUCTION]] -; CHECK-NEXT: %[[FOR2_INDEX_NEXT]] = add nuw nsw <4 x i32> %[[FOR2_INDEX]], -; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i32> %[[FOR2_INDEX_NEXT]], +; CHECK-NEXT: %[[FOR2_INDEX_NEXT]] = add nuw nsw <4 x i32> %[[FOR2_INDEX]], splat (i32 1) +; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i32> %[[FOR2_INDEX_NEXT]], splat (i32 10000) ; CHECK-NEXT: %[[EXIT_COND:.*]] = extractelement <4 x i1> %[[VEC_PTR]], i32 0 ; CHECK-NEXT: br i1 %[[EXIT_COND]], label %[[FOR1_LATCH:.*]], label %{{.*}} ; CHECK: [[FOR1_LATCH]]: ; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[REDUCTION_NEXT]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, ptr %c.out, <4 x i64> %[[VEC_INDEX]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %[[REDUCTION]], <4 x ptr> %[[C_PTR]], i32 8, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %[[REDUCTION]], <4 x ptr> %[[C_PTR]], i32 8, <4 x i1> splat (i1 true)) ; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 -; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], +; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], splat (i64 4) ; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 %[[EXIT_COND]], label %{{.*}}, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll index 4241409d3935fd..6783b9b1cba07e 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll @@ -11,9 +11,9 @@ define void @widen_call_instruction(ptr noalias nocapture readonly %a.in, ptr no ; CHECK-NEXT: %[[FOR1_INDEX:.*]] = phi i64 [ 0, %[[LABEL_PR:.*]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH:.*]] ] ; CHECK: %[[VEC_INDEX:.*]] = phi <4 x i64> [ , %[[LABEL_PR]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH]] ] ; CHECK-NEXT: %[[A_PTR:.*]] = getelementptr inbounds double, ptr %a.in, <4 x i64> %[[VEC_INDEX]] -; CHECK-NEXT: %[[MASKED_GATHER1:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[A_PTR]], i32 8, <4 x i1> , <4 x double> poison) +; CHECK-NEXT: %[[MASKED_GATHER1:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[A_PTR]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison) ; CHECK-NEXT: %[[B_PTR:.*]] = getelementptr inbounds double, ptr %b.in, <4 x i64> %[[VEC_INDEX]] -; CHECK-NEXT: %[[MASKED_GATHER2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[B_PTR]], i32 8, <4 x i1> , <4 x double> poison) +; CHECK-NEXT: %[[MASKED_GATHER2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %[[B_PTR]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison) ; CHECK-NEXT: %[[B_SQRT:.*]] = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %[[MASKED_GATHER2]]) ; CHECK-NEXT: br label %[[FOR2_HEADER:.*]] @@ -21,17 +21,17 @@ define void @widen_call_instruction(ptr noalias nocapture readonly %a.in, ptr no ; CHECK-NEXT: %[[FOR2_INDEX:.*]] = phi <4 x i32> [ zeroinitializer, %vector.body ], [ %[[FOR2_INDEX_NEXT:.*]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[MASKED_GATHER1]], %vector.body ], [ %[[REDUCTION_NEXT:.*]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[REDUCTION_NEXT]] = fadd <4 x double> %[[B_SQRT]], %[[REDUCTION]] -; CHECK-NEXT: %[[FOR2_INDEX_NEXT]] = add nuw nsw <4 x i32> %[[FOR2_INDEX]], -; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i32> %[[FOR2_INDEX_NEXT]], +; CHECK-NEXT: %[[FOR2_INDEX_NEXT]] = add nuw nsw <4 x i32> %[[FOR2_INDEX]], splat (i32 1) +; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i32> %[[FOR2_INDEX_NEXT]], splat (i32 10000) ; CHECK-NEXT: %[[EXIT_COND:.*]] = extractelement <4 x i1> %[[VEC_PTR]], i32 0 ; CHECK-NEXT: br i1 %[[EXIT_COND]], label %[[FOR1_LATCH:.*]], label %{{.*}} ; CHECK: [[FOR1_LATCH]]: ; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[REDUCTION_NEXT]], %[[FOR2_HEADER]] ] ; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, ptr %c.out, <4 x i64> %[[VEC_INDEX]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %[[REDUCTION]], <4 x ptr> %[[C_PTR]], i32 8, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %[[REDUCTION]], <4 x ptr> %[[C_PTR]], i32 8, <4 x i1> splat (i1 true)) ; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 -; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], +; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], splat (i64 4) ; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 %[[EXIT_COND]], label %{{.*}}, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll index 7b65d0257a1dc3..8fb68d7413d0e4 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll @@ -27,7 +27,7 @@ define void @loop_invariant_select(ptr noalias nocapture %out, i1 %select, doubl ; CHECK: for2.header1: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP2:%.*]], [[FOR2_HEADER1]] ] ; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[SELECT:%.*]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP1]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP1]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> splat (i1 true)) entry: br label %for1.header @@ -73,7 +73,7 @@ define void @outer_loop_dependant_select(ptr noalias nocapture %out, double %a, ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER1]] ] ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> splat (i1 true)) entry: br label %for1.header @@ -120,7 +120,7 @@ define void @inner_loop_dependant_select(ptr noalias nocapture %out, double %a, ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER1]] ] ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_PHI]] to <4 x i1> ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> splat (i1 true)) entry: br label %for1.header @@ -168,7 +168,7 @@ define void @outer_and_inner_loop_dependant_select(ptr noalias nocapture %out, d ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP3]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP3]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> splat (i1 true)) entry: br label %for1.header diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-int-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-int-row-major.ll index eb87800daea99c..c203a2a27c8132 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-int-row-major.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-int-row-major.ll @@ -152,41 +152,41 @@ define <1 x i32> @dotproduct_i32_v8_constvector(<8 x i32> %a) { ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], +; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT1:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT2:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT1]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT2]], +; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT2]], splat (i32 2) ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT3:%.*]] = insertelement <1 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT4:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT3]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul <1 x i32> [[SPLAT_SPLAT4]], +; CHECK-NEXT: [[TMP6:%.*]] = mul <1 x i32> [[SPLAT_SPLAT4]], splat (i32 3) ; CHECK-NEXT: [[TMP7:%.*]] = add <1 x i32> [[TMP4]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 3 ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT6]], +; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT6]], splat (i32 4) ; CHECK-NEXT: [[TMP10:%.*]] = add <1 x i32> [[TMP7]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 4 ; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP11]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = mul <1 x i32> [[SPLAT_SPLAT8]], +; CHECK-NEXT: [[TMP12:%.*]] = mul <1 x i32> [[SPLAT_SPLAT8]], splat (i32 5) ; CHECK-NEXT: [[TMP13:%.*]] = add <1 x i32> [[TMP10]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 5 ; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT9]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT10]], +; CHECK-NEXT: [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT10]], splat (i32 6) ; CHECK-NEXT: [[TMP16:%.*]] = add <1 x i32> [[TMP13]], [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 6 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP17]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], +; CHECK-NEXT: [[TMP18:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], splat (i32 7) ; CHECK-NEXT: [[TMP19:%.*]] = add <1 x i32> [[TMP16]], [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 7 ; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x i32> poison, i32 [[TMP20]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT13]], <1 x i32> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = mul <1 x i32> [[SPLAT_SPLAT14]], +; CHECK-NEXT: [[TMP21:%.*]] = mul <1 x i32> [[SPLAT_SPLAT14]], splat (i32 8) ; CHECK-NEXT: [[TMP22:%.*]] = add <1 x i32> [[TMP19]], [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <1 x i32> [[TMP22]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <1 x i32> poison, <1 x i32> [[TMP23]], <1 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll index 6bcc61a3c6291a..d49745909b0263 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll @@ -21,9 +21,9 @@ define void @multiply_sub_add_2x3_3x2(ptr %a.ptr, ptr %b.ptr, ptr %c.ptr) { ; RM-NEXT: store <3 x double> [[TMP0]], ptr [[A_PTR]], align 8 ; RM-NEXT: [[VEC_GEP7:%.*]] = getelementptr double, ptr [[A_PTR]], i64 3 ; RM-NEXT: store <3 x double> [[TMP1]], ptr [[VEC_GEP7]], align 8 -; RM-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[COL_LOAD2]], -; RM-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[COL_LOAD4]], -; RM-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[COL_LOAD6]], +; RM-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[COL_LOAD2]], splat (double 1.000000e+00) +; RM-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[COL_LOAD4]], splat (double 1.000000e+00) +; RM-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[COL_LOAD6]], splat (double 1.000000e+00) ; RM-NEXT: store <2 x double> [[TMP2]], ptr [[B_PTR]], align 8 ; RM-NEXT: [[VEC_GEP8:%.*]] = getelementptr double, ptr [[B_PTR]], i64 2 ; RM-NEXT: store <2 x double> [[TMP3]], ptr [[VEC_GEP8]], align 8 diff --git a/llvm/test/Transforms/MemCpyOpt/form-memset.ll b/llvm/test/Transforms/MemCpyOpt/form-memset.ll index 020a72183e9ea1..f8c4a6c6d11261 100644 --- a/llvm/test/Transforms/MemCpyOpt/form-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/form-memset.ll @@ -325,7 +325,7 @@ define void @test8() { ; CHECK-LABEL: @test8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MEMTMP:%.*]] = alloca [[STRUCT_TEST8:%.*]], align 16 -; CHECK-NEXT: store <4 x i32> , ptr [[MEMTMP]], align 16 +; CHECK-NEXT: store <4 x i32> splat (i32 -1), ptr [[MEMTMP]], align 16 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/NewGVN/completeness.ll b/llvm/test/Transforms/NewGVN/completeness.ll index 4841e2e958b281..17592ffaf5d432 100644 --- a/llvm/test/Transforms/NewGVN/completeness.ll +++ b/llvm/test/Transforms/NewGVN/completeness.ll @@ -119,8 +119,8 @@ define <2 x i32> @test3vec(i1 %which) { ; CHECK: delay: ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: -; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi <2 x i32> [ , [[ENTRY:%.*]] ], [ , [[DELAY]] ] -; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ , [[ENTRY]] ], [ , [[DELAY]] ] +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi <2 x i32> [ splat (i32 -877), [[ENTRY:%.*]] ], [ splat (i32 113), [[DELAY]] ] +; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ splat (i32 1000), [[ENTRY]] ], [ splat (i32 10), [[DELAY]] ] ; CHECK-NEXT: ret <2 x i32> [[PHIOFOPS]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll index 33bcab679ba91a..4a5ce1359d2571 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll @@ -54,10 +54,10 @@ define void @loop(ptr %X, ptr %Y) { ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD8]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD8]], -; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP5]], <2 x double> , <2 x double> [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x double> , <2 x double> [[WIDE_LOAD8]] +; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], splat (double 6.000000e+00) +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD8]], splat (double 6.000000e+00) +; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP5]], <2 x double> splat (double 6.000000e+00), <2 x double> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x double> splat (double 6.000000e+00), <2 x double> [[WIDE_LOAD8]] ; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP3]], <2 x double> zeroinitializer, <2 x double> [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP4]], <2 x double> zeroinitializer, <2 x double> [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]] @@ -152,8 +152,8 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope [[META4:![0-9]+]] ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META4]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], splat (i32 20) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope [[META7:![0-9]+]] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll index ec46ef959ce69e..5aee938f5191f2 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -113,8 +113,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP4]], i64 1 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP6]], i64 1 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <2 x i64> [[TMP8]], -; CHECK-NEXT: [[TMP12:%.*]] = icmp ult <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <2 x i64> [[TMP8]], splat (i64 225) +; CHECK-NEXT: [[TMP12:%.*]] = icmp ult <2 x i64> [[TMP10]], splat (i64 225) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i64 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i64 1 @@ -189,8 +189,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[TMP38:%.*]] = add nuw nsw i64 [[INDEX_1]], 18 ; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[TMP37]], i64 0 ; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[TMP38]], i64 1 -; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], -; CHECK-NEXT: [[TMP42:%.*]] = icmp ult <2 x i64> [[TMP40]], +; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], splat (i64 225) +; CHECK-NEXT: [[TMP42:%.*]] = icmp ult <2 x i64> [[TMP40]], splat (i64 225) ; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i1> [[TMP41]], i64 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP43]]) ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP41]], i64 1 @@ -266,8 +266,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[TMP69:%.*]] = add nuw nsw i64 [[INDEX_2]], 33 ; CHECK-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> poison, i64 [[TMP68]], i64 0 ; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i64> [[TMP70]], i64 [[TMP69]], i64 1 -; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], -; CHECK-NEXT: [[TMP73:%.*]] = icmp ult <2 x i64> [[TMP71]], +; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], splat (i64 225) +; CHECK-NEXT: [[TMP73:%.*]] = icmp ult <2 x i64> [[TMP71]], splat (i64 225) ; CHECK-NEXT: [[TMP74:%.*]] = extractelement <2 x i1> [[TMP72]], i64 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP74]]) ; CHECK-NEXT: [[TMP75:%.*]] = extractelement <2 x i1> [[TMP72]], i64 1 @@ -343,8 +343,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[TMP100:%.*]] = add nuw nsw i64 [[INDEX_3]], 48 ; CHECK-NEXT: [[TMP101:%.*]] = insertelement <2 x i64> poison, i64 [[TMP99]], i64 0 ; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i64> [[TMP101]], i64 [[TMP100]], i64 1 -; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], -; CHECK-NEXT: [[TMP104:%.*]] = icmp ult <2 x i64> [[TMP102]], +; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], splat (i64 225) +; CHECK-NEXT: [[TMP104:%.*]] = icmp ult <2 x i64> [[TMP102]], splat (i64 225) ; CHECK-NEXT: [[TMP105:%.*]] = extractelement <2 x i1> [[TMP103]], i64 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP105]]) ; CHECK-NEXT: [[TMP106:%.*]] = extractelement <2 x i1> [[TMP103]], i64 1 diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll index 6e9256afc7a8df..97d2247ab13c14 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll @@ -24,9 +24,9 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ , %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI15:%.*]] = phi <2 x double> [ , %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI15:%.*]] = phi <2 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI16:%.*]] = phi <2 x double> [ , %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x double> [ , %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV1]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX1]], align 4 @@ -41,12 +41,12 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef ; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt <2 x double> [[TMP7]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[TMP6]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[TMP7]], [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP6]], <2 x double> ) -; CHECK-NEXT: [[TMP13:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP7]], <2 x double> ) +; CHECK-NEXT: [[TMP12:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP6]], <2 x double> splat (double -0.000000e+00)) +; CHECK-NEXT: [[TMP13:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP7]], <2 x double> splat (double -0.000000e+00)) ; CHECK-NEXT: [[TMP14]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI16]], [[TMP12]] ; CHECK-NEXT: [[TMP15]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI17]], [[TMP13]] -; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP8]], <2 x double> [[TMP10]], <2 x double> -; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP9]], <2 x double> [[TMP11]], <2 x double> +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP8]], <2 x double> [[TMP10]], <2 x double> splat (double -0.000000e+00) +; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP9]], <2 x double> [[TMP11]], <2 x double> splat (double -0.000000e+00) ; CHECK-NEXT: [[TMP18]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI]], [[TMP16]] ; CHECK-NEXT: [[TMP19]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI15]], [[TMP17]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV1]], 4 @@ -210,9 +210,9 @@ define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %R ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ [[TMP2]], %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI31:%.*]] = phi <2 x double> [ , %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI31:%.*]] = phi <2 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI32:%.*]] = phi <2 x double> [ [[TMP27]], %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI33:%.*]] = phi <2 x double> [ , %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI33:%.*]] = phi <2 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX_US1:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_US1]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX_US1]], align 4 @@ -229,12 +229,12 @@ define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %R ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt <2 x double> [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP10]] ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP11]] -; CHECK-NEXT: [[TMP16:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP10]], <2 x double> ) -; CHECK-NEXT: [[TMP17:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP11]], <2 x double> ) +; CHECK-NEXT: [[TMP16:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP10]], <2 x double> splat (double -0.000000e+00)) +; CHECK-NEXT: [[TMP17:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP11]], <2 x double> splat (double -0.000000e+00)) ; CHECK-NEXT: [[TMP18]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI32]], [[TMP16]] ; CHECK-NEXT: [[TMP19]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI33]], [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP12]], <2 x double> [[TMP14]], <2 x double> -; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP13]], <2 x double> [[TMP15]], <2 x double> +; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP12]], <2 x double> [[TMP14]], <2 x double> splat (double -0.000000e+00) +; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP13]], <2 x double> [[TMP15]], <2 x double> splat (double -0.000000e+00) ; CHECK-NEXT: [[TMP22]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI]], [[TMP20]] ; CHECK-NEXT: [[TMP23]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI31]], [[TMP21]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV1]], 4 diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll index b53d0c211919b8..eba0ed2d35fe46 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll @@ -43,8 +43,8 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[TMP12:%.*]] = sub nsw <8 x i32> [[TMP7]], [[TMP2]] ; CHECK-NEXT: [[TMP13:%.*]] = mul <8 x i32> [[TMP11]], [[TMP9]] ; CHECK-NEXT: [[TMP14:%.*]] = mul <8 x i32> [[TMP12]], [[TMP10]] -; CHECK-NEXT: [[TMP15:%.*]] = lshr <8 x i32> [[TMP13]], -; CHECK-NEXT: [[TMP16:%.*]] = lshr <8 x i32> [[TMP14]], +; CHECK-NEXT: [[TMP15:%.*]] = lshr <8 x i32> [[TMP13]], splat (i32 16) +; CHECK-NEXT: [[TMP16:%.*]] = lshr <8 x i32> [[TMP14]], splat (i32 16) ; CHECK-NEXT: [[TMP17:%.*]] = trunc nuw <8 x i32> [[TMP15]] to <8 x i16> ; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <8 x i32> [[TMP16]] to <8 x i16> ; CHECK-NEXT: [[TMP19:%.*]] = sub <8 x i16> zeroinitializer, [[TMP17]] @@ -53,8 +53,8 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { ; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw <8 x i32> [[TMP7]], [[TMP2]] ; CHECK-NEXT: [[TMP23:%.*]] = mul <8 x i32> [[TMP21]], [[TMP9]] ; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i32> [[TMP22]], [[TMP10]] -; CHECK-NEXT: [[TMP25:%.*]] = lshr <8 x i32> [[TMP23]], -; CHECK-NEXT: [[TMP26:%.*]] = lshr <8 x i32> [[TMP24]], +; CHECK-NEXT: [[TMP25:%.*]] = lshr <8 x i32> [[TMP23]], splat (i32 16) +; CHECK-NEXT: [[TMP26:%.*]] = lshr <8 x i32> [[TMP24]], splat (i32 16) ; CHECK-NEXT: [[TMP27:%.*]] = trunc nuw <8 x i32> [[TMP25]] to <8 x i16> ; CHECK-NEXT: [[TMP28:%.*]] = trunc nuw <8 x i32> [[TMP26]] to <8 x i16> ; CHECK-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP27]], <8 x i16> [[TMP19]] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll index 2121775224098e..8d31a7d25ff3da 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll @@ -72,7 +72,7 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 ; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]] -; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], +; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16) ; CHECK-NEXT: [[TMP43:%.*]] = add nsw <16 x i32> [[TMP42]], [[TMP26]] ; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> @@ -99,9 +99,9 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 ; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP62]], [[TMP64]] ; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> -; CHECK-NEXT: [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], -; CHECK-NEXT: [[TMP70:%.*]] = and <16 x i32> [[TMP69]], -; CHECK-NEXT: [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], +; CHECK-NEXT: [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], splat (i32 15) +; CHECK-NEXT: [[TMP70:%.*]] = and <16 x i32> [[TMP69]], splat (i32 65537) +; CHECK-NEXT: [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], splat (i32 65535) ; CHECK-NEXT: [[TMP72:%.*]] = add <16 x i32> [[TMP71]], [[TMP68]] ; CHECK-NEXT: [[TMP73:%.*]] = xor <16 x i32> [[TMP72]], [[TMP71]] ; CHECK-NEXT: [[TMP74:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP73]]) diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll index 6e9abb3813aa1d..595b0f76a418be 100644 --- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll +++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll @@ -39,8 +39,8 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS ; CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x i16>, ptr [[NEXT_GEP16]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[WIDE_LOAD17]] to <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <8 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = ashr <8 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP6]], <8 x i32> ) +; CHECK-NEXT: [[TMP6:%.*]] = ashr <8 x i32> [[TMP5]], splat (i32 15) +; CHECK-NEXT: [[TMP7:%.*]] = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP6]], <8 x i32> splat (i32 32767)) ; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16> ; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr [[NEXT_GEP14]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll index 33a0eb43b70856..0db94bea46c402 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll @@ -62,8 +62,8 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 ; O2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] ; O2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]] -; O2-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], -; O2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD8]], +; O2-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) +; O2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD8]], splat (i32 1) ; O2-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]] ; O2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]] ; O2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -107,8 +107,8 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 ; O3-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] ; O3-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]] -; O3-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], -; O3-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD9]], +; O3-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) +; O3-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD9]], splat (i32 1) ; O3-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]] ; O3-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]] ; O3-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll index 05e39efc7745ae..580c13d50b1f51 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll @@ -38,10 +38,10 @@ define void @loop_or(ptr noalias %pIn, ptr noalias %pOut, i32 %s) { ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[WIDE_LOAD4]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = or disjoint <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = or disjoint <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[TMP2]], splat (i32 65793) +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw <4 x i32> [[TMP3]], splat (i32 65793) +; CHECK-NEXT: [[TMP6:%.*]] = or disjoint <4 x i32> [[TMP4]], splat (i32 -16777216) +; CHECK-NEXT: [[TMP7:%.*]] = or disjoint <4 x i32> [[TMP5]], splat (i32 -16777216) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[POUT:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll index be7906ebc0a8a2..a054d87997b1b5 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll @@ -58,22 +58,22 @@ define dso_local void @test(ptr %start, ptr %end) #0 { ; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD9:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 ; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 -; AVX2-NEXT: [[TMP8:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP9:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], -; AVX2-NEXT: [[TMP10:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], -; AVX2-NEXT: [[TMP11:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], -; AVX2-NEXT: [[TMP12:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], -; AVX2-NEXT: [[TMP13:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], -; AVX2-NEXT: [[TMP14:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], -; AVX2-NEXT: [[TMP15:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], +; AVX2-NEXT: [[TMP8:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], splat (i32 -12) +; AVX2-NEXT: [[TMP9:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], splat (i32 -12) +; AVX2-NEXT: [[TMP10:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], splat (i32 -12) +; AVX2-NEXT: [[TMP11:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], splat (i32 -12) +; AVX2-NEXT: [[TMP12:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], splat (i32 13) +; AVX2-NEXT: [[TMP13:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], splat (i32 13) +; AVX2-NEXT: [[TMP14:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], splat (i32 13) +; AVX2-NEXT: [[TMP15:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], splat (i32 13) ; AVX2-NEXT: [[TMP16:%.*]] = or <8 x i1> [[TMP8]], [[TMP12]] ; AVX2-NEXT: [[TMP17:%.*]] = or <8 x i1> [[TMP9]], [[TMP13]] ; AVX2-NEXT: [[TMP18:%.*]] = or <8 x i1> [[TMP10]], [[TMP14]] ; AVX2-NEXT: [[TMP19:%.*]] = or <8 x i1> [[TMP11]], [[TMP15]] -; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> , ptr [[NEXT_GEP]], i32 4, <8 x i1> [[TMP16]]) -; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> , ptr [[TMP5]], i32 4, <8 x i1> [[TMP17]]) -; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> , ptr [[TMP6]], i32 4, <8 x i1> [[TMP18]]) -; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> , ptr [[TMP7]], i32 4, <8 x i1> [[TMP19]]) +; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[NEXT_GEP]], i32 4, <8 x i1> [[TMP16]]) +; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[TMP5]], i32 4, <8 x i1> [[TMP17]]) +; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[TMP6]], i32 4, <8 x i1> [[TMP18]]) +; AVX2-NEXT: tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[TMP7]], i32 4, <8 x i1> [[TMP19]]) ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX2-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AVX2-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll index d9101272ba3d0f..549e4e036a747b 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll @@ -6,26 +6,26 @@ define void @trunc_through_one_add(ptr noalias %0, ptr noalias readonly %1) { ; SSE-LABEL: @trunc_through_one_add( ; SSE-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[TMP1:%.*]], align 1 ; SSE-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i16> -; SSE-NEXT: [[TMP5:%.*]] = lshr <8 x i16> [[TMP4]], +; SSE-NEXT: [[TMP5:%.*]] = lshr <8 x i16> [[TMP4]], splat (i16 1) ; SSE-NEXT: [[TMP6:%.*]] = add nuw nsw <8 x i16> [[TMP5]], [[TMP4]] -; SSE-NEXT: [[TMP7:%.*]] = lshr <8 x i16> [[TMP6]], +; SSE-NEXT: [[TMP7:%.*]] = lshr <8 x i16> [[TMP6]], splat (i16 2) ; SSE-NEXT: store <8 x i16> [[TMP7]], ptr [[TMP0:%.*]], align 2 ; SSE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 ; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16 ; SSE-NEXT: [[TMP10:%.*]] = load <8 x i8>, ptr [[TMP8]], align 1 ; SSE-NEXT: [[TMP11:%.*]] = zext <8 x i8> [[TMP10]] to <8 x i16> -; SSE-NEXT: [[TMP12:%.*]] = lshr <8 x i16> [[TMP11]], +; SSE-NEXT: [[TMP12:%.*]] = lshr <8 x i16> [[TMP11]], splat (i16 1) ; SSE-NEXT: [[TMP13:%.*]] = add nuw nsw <8 x i16> [[TMP12]], [[TMP11]] -; SSE-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], +; SSE-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], splat (i16 2) ; SSE-NEXT: store <8 x i16> [[TMP14]], ptr [[TMP9]], align 2 ; SSE-NEXT: ret void ; ; AVX-LABEL: @trunc_through_one_add( ; AVX-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[TMP1:%.*]], align 1 ; AVX-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[TMP3]] to <16 x i16> -; AVX-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP4]], +; AVX-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP4]], splat (i16 1) ; AVX-NEXT: [[TMP6:%.*]] = add nuw nsw <16 x i16> [[TMP5]], [[TMP4]] -; AVX-NEXT: [[TMP7:%.*]] = lshr <16 x i16> [[TMP6]], +; AVX-NEXT: [[TMP7:%.*]] = lshr <16 x i16> [[TMP6]], splat (i16 2) ; AVX-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP0:%.*]], align 2 ; AVX-NEXT: ret void ; @@ -181,9 +181,9 @@ define void @trunc_through_two_adds(ptr noalias %0, ptr noalias readonly %1, ptr ; SSE-NEXT: [[TMP6:%.*]] = load <8 x i8>, ptr [[TMP2:%.*]], align 1 ; SSE-NEXT: [[TMP7:%.*]] = zext <8 x i8> [[TMP6]] to <8 x i16> ; SSE-NEXT: [[TMP8:%.*]] = add nuw nsw <8 x i16> [[TMP7]], [[TMP5]] -; SSE-NEXT: [[TMP9:%.*]] = lshr <8 x i16> [[TMP8]], +; SSE-NEXT: [[TMP9:%.*]] = lshr <8 x i16> [[TMP8]], splat (i16 1) ; SSE-NEXT: [[TMP10:%.*]] = add nuw nsw <8 x i16> [[TMP9]], [[TMP8]] -; SSE-NEXT: [[TMP11:%.*]] = lshr <8 x i16> [[TMP10]], +; SSE-NEXT: [[TMP11:%.*]] = lshr <8 x i16> [[TMP10]], splat (i16 2) ; SSE-NEXT: store <8 x i16> [[TMP11]], ptr [[TMP0:%.*]], align 2 ; SSE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 ; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 8 @@ -193,9 +193,9 @@ define void @trunc_through_two_adds(ptr noalias %0, ptr noalias readonly %1, ptr ; SSE-NEXT: [[TMP17:%.*]] = load <8 x i8>, ptr [[TMP13]], align 1 ; SSE-NEXT: [[TMP18:%.*]] = zext <8 x i8> [[TMP17]] to <8 x i16> ; SSE-NEXT: [[TMP19:%.*]] = add nuw nsw <8 x i16> [[TMP18]], [[TMP16]] -; SSE-NEXT: [[TMP20:%.*]] = lshr <8 x i16> [[TMP19]], +; SSE-NEXT: [[TMP20:%.*]] = lshr <8 x i16> [[TMP19]], splat (i16 1) ; SSE-NEXT: [[TMP21:%.*]] = add nuw nsw <8 x i16> [[TMP20]], [[TMP19]] -; SSE-NEXT: [[TMP22:%.*]] = lshr <8 x i16> [[TMP21]], +; SSE-NEXT: [[TMP22:%.*]] = lshr <8 x i16> [[TMP21]], splat (i16 2) ; SSE-NEXT: store <8 x i16> [[TMP22]], ptr [[TMP14]], align 2 ; SSE-NEXT: ret void ; @@ -205,9 +205,9 @@ define void @trunc_through_two_adds(ptr noalias %0, ptr noalias readonly %1, ptr ; AVX-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[TMP2:%.*]], align 1 ; AVX-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i16> ; AVX-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i16> [[TMP7]], [[TMP5]] -; AVX-NEXT: [[TMP9:%.*]] = lshr <16 x i16> [[TMP8]], +; AVX-NEXT: [[TMP9:%.*]] = lshr <16 x i16> [[TMP8]], splat (i16 1) ; AVX-NEXT: [[TMP10:%.*]] = add nuw nsw <16 x i16> [[TMP9]], [[TMP8]] -; AVX-NEXT: [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], +; AVX-NEXT: [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], splat (i16 2) ; AVX-NEXT: store <16 x i16> [[TMP11]], ptr [[TMP0:%.*]], align 2 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll index 1c16b37144f1eb..fb7a4a3df74ef2 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll @@ -15,7 +15,7 @@ define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize { ; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 255, [[INDVARS_IV]] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[TMP3]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = load <8 x i32>, ptr [[GEP]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 5) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP10]], align 4 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll b/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll index da2bd6f22df9f3..719d6df77cb7d5 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll @@ -10,7 +10,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) { ; CHECK-LABEL: @shuffle_32_add_16_shuffle_32_masks_are_eq( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[BC5:%.*]] = bitcast <8 x i16> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[BC5]] ; @@ -31,7 +31,7 @@ define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) { define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) { ; CHECK-LABEL: @shuffle_32_add_8_shuffle_32_masks_are_eq( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[BC5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[BC5]] ; @@ -52,7 +52,7 @@ define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) { define <2 x i64> @shuffle_8_add_32_shuffle_8_masks_are_eq(<2 x i64> %v) { ; CHECK-LABEL: @shuffle_8_add_32_shuffle_8_masks_are_eq( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[BC5:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[BC5]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll b/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll index 039e735985f9fc..930ce6a7b7d677 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll @@ -10,7 +10,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) { ; CHECK-LABEL: @shuffle_32_add_16_shuffle_32_masks_are_eq( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1) ; CHECK-NEXT: [[BC5:%.*]] = bitcast <8 x i16> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[BC5]] ; @@ -31,7 +31,7 @@ define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) { define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) { ; CHECK-LABEL: @shuffle_32_add_8_shuffle_32_masks_are_eq( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: [[BC5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[BC5]] ; @@ -52,7 +52,7 @@ define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) { define <2 x i64> @shuffle_8_add_32_shuffle_8_masks_are_eq(<2 x i64> %v) { ; CHECK-LABEL: @shuffle_8_add_32_shuffle_8_masks_are_eq( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: [[BC5:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[BC5]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll index 5bf7be4362a8e4..92ce8eb5b5ac23 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll @@ -41,8 +41,8 @@ define void @licm(ptr align 8 dereferenceable(8) %_M_start.i, i64 %numElem) { ; O23-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; O23-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 [[INDEX]] ; O23-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 -; O23-NEXT: store <2 x double> , ptr [[TMP1]], align 8, !tbaa [[TBAA8:![0-9]+]] -; O23-NEXT: store <2 x double> , ptr [[TMP2]], align 8, !tbaa [[TBAA8]] +; O23-NEXT: store <2 x double> splat (double 2.000000e+00), ptr [[TMP1]], align 8, !tbaa [[TBAA8:![0-9]+]] +; O23-NEXT: store <2 x double> splat (double 2.000000e+00), ptr [[TMP2]], align 8, !tbaa [[TBAA8]] ; O23-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; O23-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; O23-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll index 465b18ade1a7c3..993e949d6d2a0c 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll @@ -19,7 +19,7 @@ define void @vdiv(ptr %a, float %b) #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast <4 x float> , [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast <4 x float> splat (float 1.000000e+00), [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll index 87d3900765490f..ac4b56b0fd1b11 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -29,10 +29,10 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll index 841096d226f756..428147a1383b11 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll @@ -34,35 +34,35 @@ define noundef i64 @foo(i64 noundef %0) { define void @bar(ptr noundef %0) { ; SSE-LABEL: @bar( ; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[TMP0:%.*]], align 8 -; SSE-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], -; SSE-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[TMP3]], +; SSE-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], splat (i64 -1) +; SSE-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[TMP3]], splat (i64 44) ; SSE-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP0]], align 8 ; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16 ; SSE-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; SSE-NEXT: [[TMP7:%.*]] = xor <2 x i64> [[TMP6]], -; SSE-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], +; SSE-NEXT: [[TMP7:%.*]] = xor <2 x i64> [[TMP6]], splat (i64 -1) +; SSE-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], splat (i64 44) ; SSE-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP5]], align 8 ; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32 ; SSE-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8 -; SSE-NEXT: [[TMP11:%.*]] = xor <2 x i64> [[TMP10]], -; SSE-NEXT: [[TMP12:%.*]] = shl <2 x i64> [[TMP11]], +; SSE-NEXT: [[TMP11:%.*]] = xor <2 x i64> [[TMP10]], splat (i64 -1) +; SSE-NEXT: [[TMP12:%.*]] = shl <2 x i64> [[TMP11]], splat (i64 44) ; SSE-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP9]], align 8 ; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 48 ; SSE-NEXT: [[TMP14:%.*]] = load <2 x i64>, ptr [[TMP13]], align 8 -; SSE-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP14]], -; SSE-NEXT: [[TMP16:%.*]] = shl <2 x i64> [[TMP15]], +; SSE-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP14]], splat (i64 -1) +; SSE-NEXT: [[TMP16:%.*]] = shl <2 x i64> [[TMP15]], splat (i64 44) ; SSE-NEXT: store <2 x i64> [[TMP16]], ptr [[TMP13]], align 8 ; SSE-NEXT: ret void ; ; AVX-LABEL: @bar( ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8 -; AVX-NEXT: [[TMP3:%.*]] = xor <4 x i64> [[TMP2]], -; AVX-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], +; AVX-NEXT: [[TMP3:%.*]] = xor <4 x i64> [[TMP2]], splat (i64 -1) +; AVX-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], splat (i64 44) ; AVX-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP0]], align 8 ; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32 ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 -; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i64> [[TMP6]], -; AVX-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP7]], +; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i64> [[TMP6]], splat (i64 -1) +; AVX-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP7]], splat (i64 44) ; AVX-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP5]], align 8 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll index d2850f36a80dc2..035f3f145bf8d3 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -11,7 +11,7 @@ define float @test_merge_allof_v4sf(<4 x float> %t) { ; CHECK-NEXT: [[TMP0:%.*]] = fcmp uge <4 x float> [[T_FR]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TMP2]], [[TMP5]] @@ -168,7 +168,7 @@ define float @test_separate_allof_v4sf(<4 x float> %t) { ; CHECK-NEXT: [[TMP0:%.*]] = fcmp uge <4 x float> [[T_FR]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TMP2]], [[TMP5]] @@ -250,7 +250,7 @@ define float @test_separate_anyof_v4sf(<4 x float> %t) { ; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[T_FR]], +; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[T_FR]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 ; CHECK-NEXT: [[DOTNOT6:%.*]] = icmp eq i4 [[TMP3]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[DOTNOT]], [[DOTNOT6]] @@ -332,7 +332,7 @@ define float @test_merge_allof_v4si(<4 x i32> %t) { ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i32> [[T_FR]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], splat (i32 256) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TMP2]], [[TMP5]] @@ -479,7 +479,7 @@ define i32 @test_separate_allof_v4si(<4 x i32> %t) { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], splat (i32 256) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> @@ -552,12 +552,12 @@ define i32 @test_separate_anyof_v4si(<4 x i32> %t) { ; CHECK-LABEL: @test_separate_anyof_v4si( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], splat (i32 1) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[DOTNOT]], label [[IF_END:%.*]], label [[RETURN:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[T_FR]], splat (i32 255) ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 ; CHECK-NEXT: [[DOTNOT6:%.*]] = icmp eq i4 [[TMP3]], 0 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index c169bc000644ed..254136b0b841a9 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -280,11 +280,11 @@ define i1 @cmp_lt_gt(double %a, double %b, double %c) { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], splat (double 0x3EB0C6F7A0B5ED8D) ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i1> [[TMP8]], [[SHIFT]] ; CHECK-NEXT: [[OR_COND:%.*]] = extractelement <2 x i1> [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], splat (double 1.000000e+00) ; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <2 x i1> [[TMP10]], <2 x i1> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = or <2 x i1> [[TMP10]], [[SHIFT2]] ; CHECK-NEXT: [[OR_COND1_NOT:%.*]] = extractelement <2 x i1> [[TMP11]], i64 0 diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-load-store.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-load-store.ll index c966c3e7de0a24..1afd8950b02ff1 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-load-store.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-load-store.ll @@ -30,7 +30,7 @@ define <2 x i64> @vpload_v2i64_allones_mask(ptr %ptr, i32 zeroext %evl) { ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[PTR:%.*]], i32 1, <2 x i1> [[TMP2]], <2 x i64> poison) ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -74,7 +74,7 @@ define void @vpstore_v2i64_allones_mask(<2 x i64> %val, ptr %ptr, i32 zeroext %e ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> , [[DOTSPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], splat (i1 true) ; CHECK-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[VAL:%.*]], ptr [[PTR:%.*]], i32 1, <2 x i1> [[TMP2]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll index 92b36054356b20..6eaf98f893bfab 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll @@ -182,7 +182,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <8 x i32> , [[NSPLAT]] ; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m -; ALL-CONVERT-NEXT: [[SELONE:%.+]] = select <8 x i1> [[NEWM]], <8 x i32> %i1, <8 x i32> +; ALL-CONVERT-NEXT: [[SELONE:%.+]] = select <8 x i1> [[NEWM]], <8 x i32> %i1, <8 x i32> splat (i32 1) ; ALL-CONVERT-NEXT: %{{.+}} = sdiv <8 x i32> %i0, [[SELONE]] ; ALL-CONVERT-NOT: %{{.+}} = srem <8 x i32> %i0, %i1 ; ALL-CONVERT: %{{.+}} = srem <8 x i32> %i0, %{{.+}} @@ -212,10 +212,10 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; ALL-CONVERT-NEXT: [[ADD:%.+]] = select <4 x i1> [[NEWM]], <4 x i32> %vi, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[ADD]]) ; ALL-CONVERT-NEXT: %{{.+}} = add i32 [[RED]], %start -; ALL-CONVERT: [[MUL:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[MUL:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> splat (i32 1) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[MUL]]) ; ALL-CONVERT-NEXT: %{{.+}} = mul i32 [[RED]], %start -; ALL-CONVERT: [[AND:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[AND:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> splat (i32 -1) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[AND]]) ; ALL-CONVERT-NEXT: %{{.+}} = and i32 [[RED]], %start ; ALL-CONVERT: [[OR:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer @@ -224,13 +224,13 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; ALL-CONVERT: [[XOR:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[XOR]]) ; ALL-CONVERT-NEXT: %{{.+}} = xor i32 [[RED]], %start -; ALL-CONVERT: [[SMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[SMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> splat (i32 2147483647) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SMIN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.smin.i32(i32 [[RED]], i32 %start) -; ALL-CONVERT: [[SMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[SMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> splat (i32 -2147483648) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SMAX]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.smax.i32(i32 [[RED]], i32 %start) -; ALL-CONVERT: [[UMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[UMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> splat (i32 -1) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[UMIN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.umin.i32(i32 [[RED]], i32 %start) ; ALL-CONVERT: [[UMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer @@ -244,52 +244,52 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <4 x i32> , [[NSPLAT]] ; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m -; ALL-CONVERT-NEXT: [[FMIN:%.+]] = select <4 x i1> [[NEWM]], <4 x float> %vf, <4 x float> +; ALL-CONVERT-NEXT: [[FMIN:%.+]] = select <4 x i1> [[NEWM]], <4 x float> %vf, <4 x float> splat (float 0x7FF8000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.minnum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMIN_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMIN_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0x7FF0000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN_NNAN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.minnum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMIN_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMIN_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0x47EFFFFFE0000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN_NNAN_NINF]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.minnum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0xFFF8000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX]]) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.maxnum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMAX_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAX_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0xFFF0000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.maxnum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMAX_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAX_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0xC7EFFFFFE0000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN_NINF]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.maxnum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMINIMUM:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMINIMUM:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0x7FF0000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[FMINIMUM]]) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.minimum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMINIMUM_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMINIMUM_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0x7FF0000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[FMINIMUM_NNAN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.minimum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMINIMUM_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMINIMUM_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0x47EFFFFFE0000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[FMINIMUM_NNAN_NINF]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.minimum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMAXIMUM:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAXIMUM:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0xFFF0000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[FMAXIMUM]]) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.maximum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMAXIMUM_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAXIMUM_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0xFFF0000000000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[FMAXIMUM_NNAN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.maximum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FMAXIMUM_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAXIMUM_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 0xC7EFFFFFE0000000) ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[FMAXIMUM_NNAN_NINF]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.maximum.f32(float [[RED]], float %f) -; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float -0.000000e+00) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]]) -; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float -0.000000e+00) ; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]]) -; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 1.000000e+00) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]]) -; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> splat (float 1.000000e+00) ; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]]) ; ALL-CONVERT-NEXT: ret void diff --git a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll index 0b88a65a793e92..617d513f712681 100644 --- a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll +++ b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll @@ -103,8 +103,8 @@ define <2 x double> @test3_reassoc(<2 x double> %a, <2 x double> %b, <2 x double define <2 x float> @test4(<2 x float> %A) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[X:%.*]] = fadd <2 x float> [[A:%.*]], -; CHECK-NEXT: [[Y:%.*]] = fadd <2 x float> [[A]], +; CHECK-NEXT: [[X:%.*]] = fadd <2 x float> [[A:%.*]], splat (float 1.000000e+00) +; CHECK-NEXT: [[Y:%.*]] = fadd <2 x float> [[A]], splat (float 1.000000e+00) ; CHECK-NEXT: [[R:%.*]] = fsub <2 x float> [[X]], [[Y]] ; CHECK-NEXT: ret <2 x float> [[R]] ; @@ -118,7 +118,7 @@ define <2 x float> @test4(<2 x float> %A) { define <2 x float> @test5(<2 x float> %X) { ; CHECK-LABEL: @test5( -; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast <2 x float> [[X:%.*]], +; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 9.400000e+01) ; CHECK-NEXT: ret <2 x float> [[FACTOR]] ; %Y = fmul fast <2 x float> %X, @@ -130,7 +130,7 @@ define <2 x float> @test5(<2 x float> %X) { define <2 x float> @test5_reassoc(<2 x float> %X) { ; CHECK-LABEL: @test5_reassoc( -; CHECK-NEXT: [[Y:%.*]] = fmul reassoc <2 x float> [[X:%.*]], +; CHECK-NEXT: [[Y:%.*]] = fmul reassoc <2 x float> [[X:%.*]], splat (float 4.700000e+01) ; CHECK-NEXT: [[Z:%.*]] = fadd reassoc <2 x float> [[Y]], [[Y]] ; CHECK-NEXT: ret <2 x float> [[Z]] ; @@ -143,7 +143,7 @@ define <2 x float> @test5_reassoc(<2 x float> %X) { define <2 x float> @test6(<2 x float> %X) { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast <2 x float> [[X:%.*]], +; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 3.000000e+00) ; CHECK-NEXT: ret <2 x float> [[FACTOR]] ; %Y = fadd fast <2 x float> %X ,%X @@ -168,7 +168,7 @@ define <2 x float> @test6_reassoc(<2 x float> %X) { define <2 x double> @test7(<2 x double> %W) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[REASS_MUL:%.*]] = fmul fast <2 x double> [[W:%.*]], +; CHECK-NEXT: [[REASS_MUL:%.*]] = fmul fast <2 x double> [[W:%.*]], splat (double 1.770000e+02) ; CHECK-NEXT: ret <2 x double> [[REASS_MUL]] ; %X = fmul fast <2 x double> %W, @@ -181,8 +181,8 @@ define <2 x double> @test7(<2 x double> %W) { define <2 x double> @test7_reassoc(<2 x double> %W) { ; CHECK-LABEL: @test7_reassoc( -; CHECK-NEXT: [[X:%.*]] = fmul reassoc <2 x double> [[W:%.*]], -; CHECK-NEXT: [[Y:%.*]] = fmul reassoc <2 x double> [[W]], +; CHECK-NEXT: [[X:%.*]] = fmul reassoc <2 x double> [[W:%.*]], splat (double 1.270000e+02) +; CHECK-NEXT: [[Y:%.*]] = fmul reassoc <2 x double> [[W]], splat (double 5.000000e+01) ; CHECK-NEXT: [[Z:%.*]] = fadd reassoc <2 x double> [[Y]], [[X]] ; CHECK-NEXT: ret <2 x double> [[Z]] ; @@ -196,7 +196,7 @@ define <2 x double> @test7_reassoc(<2 x double> %W) { define <2 x float> @test8(<2 x float> %arg) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[ARG:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[ARG:%.*]], splat (float 1.440000e+02) ; CHECK-NEXT: ret <2 x float> [[TMP2]] ; %tmp1 = fmul fast <2 x float> , %arg @@ -208,8 +208,8 @@ define <2 x float> @test8(<2 x float> %arg) { define <2 x float> @test8_reassoc(<2 x float> %arg) { ; CHECK-LABEL: @test8_reassoc( -; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <2 x float> [[ARG:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <2 x float> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <2 x float> [[ARG:%.*]], splat (float 1.200000e+01) +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <2 x float> [[TMP1]], splat (float 1.200000e+01) ; CHECK-NEXT: ret <2 x float> [[TMP2]] ; %tmp1 = fmul reassoc <2 x float> , %arg @@ -222,7 +222,7 @@ define <2 x float> @test8_reassoc(<2 x float> %arg) { define <2 x double> @test9(<2 x double> %b, <2 x double> %a) { ; CHECK-LABEL: @test9( ; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x double> zeroinitializer, [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], splat (double 1.234000e+03) ; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = fadd fast <2 x double> %a, @@ -235,7 +235,7 @@ define <2 x double> @test9(<2 x double> %b, <2 x double> %a) { define <2 x double> @test9_unary_fneg(<2 x double> %b, <2 x double> %a) { ; CHECK-LABEL: @test9_unary_fneg( ; CHECK-NEXT: [[TMP1:%.*]] = fneg fast <2 x double> [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], splat (double 1.234000e+03) ; CHECK-NEXT: ret <2 x double> [[TMP2]] ; %1 = fadd fast <2 x double> %a, @@ -249,7 +249,7 @@ define <2 x double> @test9_unary_fneg(<2 x double> %b, <2 x double> %a) { define <2 x double> @test9_reassoc(<2 x double> %b, <2 x double> %a) { ; CHECK-LABEL: @test9_reassoc( -; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <2 x double> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <2 x double> [[A:%.*]], splat (double 1.234000e+03) ; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <2 x double> [[B:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc <2 x double> zeroinitializer, [[A]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd reassoc <2 x double> [[TMP3]], [[TMP2]] @@ -264,7 +264,7 @@ define <2 x double> @test9_reassoc(<2 x double> %b, <2 x double> %a) { define <2 x double> @test9_reassoc_unary_fneg(<2 x double> %b, <2 x double> %a) { ; CHECK-LABEL: @test9_reassoc_unary_fneg( -; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <2 x double> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <2 x double> [[A:%.*]], splat (double 1.234000e+03) ; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <2 x double> [[B:%.*]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = fneg reassoc <2 x double> [[A]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd reassoc <2 x double> [[TMP3]], [[TMP2]] @@ -281,7 +281,7 @@ define <2 x double> @test9_reassoc_unary_fneg(<2 x double> %b, <2 x double> %a) define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: [[C:%.*]] = fmul fast <2 x float> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = fmul fast <2 x float> [[A:%.*]], splat (float 4.000000e+01) ; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[C]], [[Z:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer ; CHECK-NEXT: ret <2 x float> [[TMP1]] @@ -295,7 +295,7 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) { define <2 x float> @test10_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10_unary_fneg( -; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], +; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], splat (float 4.000000e+01) ; CHECK-NEXT: [[F:%.*]] = fmul fast <2 x float> [[E]], [[Z:%.*]] ; CHECK-NEXT: ret <2 x float> [[F]] ; @@ -310,7 +310,7 @@ define <2 x float> @test10_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float define <2 x float> @test10_reassoc(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10_reassoc( -; CHECK-NEXT: [[D:%.*]] = fmul reassoc <2 x float> [[Z:%.*]], +; CHECK-NEXT: [[D:%.*]] = fmul reassoc <2 x float> [[Z:%.*]], splat (float 4.000000e+01) ; CHECK-NEXT: [[C:%.*]] = fsub reassoc <2 x float> zeroinitializer, [[D]] ; CHECK-NEXT: [[E:%.*]] = fmul reassoc <2 x float> [[A:%.*]], [[C]] ; CHECK-NEXT: [[F:%.*]] = fsub reassoc <2 x float> zeroinitializer, [[E]] @@ -325,7 +325,7 @@ define <2 x float> @test10_reassoc(<2 x float> %a, <2 x float> %b, <2 x float> % define <2 x float> @test10_reassoc_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10_reassoc_unary_fneg( -; CHECK-NEXT: [[D:%.*]] = fmul reassoc <2 x float> [[Z:%.*]], +; CHECK-NEXT: [[D:%.*]] = fmul reassoc <2 x float> [[Z:%.*]], splat (float 4.000000e+01) ; CHECK-NEXT: [[C:%.*]] = fneg reassoc <2 x float> [[D]] ; CHECK-NEXT: [[E:%.*]] = fmul reassoc <2 x float> [[A:%.*]], [[C]] ; CHECK-NEXT: [[F:%.*]] = fneg reassoc <2 x float> [[E]] @@ -343,7 +343,7 @@ define <2 x float> @test10_reassoc_unary_fneg(<2 x float> %a, <2 x float> %b, <2 define <2 x double> @test11(<2 x double> %x, <2 x double> %y) { ; CHECK-LABEL: @test11( ; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast <2 x double> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[REASS_MUL:%.*]] = fmul fast <2 x double> [[FACTOR]], +; CHECK-NEXT: [[REASS_MUL:%.*]] = fmul fast <2 x double> [[FACTOR]], splat (double 2.000000e+00) ; CHECK-NEXT: ret <2 x double> [[REASS_MUL]] ; %1 = fmul fast <2 x double> %x, %y @@ -372,7 +372,7 @@ define <2 x double> @test11_reassoc(<2 x double> %x, <2 x double> %y) { define <2 x i64> @test12(<2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: @test12( ; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i64> [[C:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i64> [[MUL]], +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i64> [[MUL]], splat (i64 5) ; CHECK-NEXT: ret <2 x i64> [[SHL]] ; %mul = mul <2 x i64> %c, %b @@ -384,7 +384,7 @@ define <2 x i64> @test12(<2 x i64> %b, <2 x i64> %c) { define <4 x float> @test13(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test13( -; CHECK-NEXT: [[MUL:%.*]] = fmul fast <4 x float> [[B:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = fmul fast <4 x float> [[B:%.*]], splat (float 5.000000e+00) ; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <4 x float> [[A:%.*]], [[MUL]] ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; diff --git a/llvm/test/Transforms/Reassociate/negation.ll b/llvm/test/Transforms/Reassociate/negation.ll index 14ae86fb94aaba..81e888a9476888 100644 --- a/llvm/test/Transforms/Reassociate/negation.ll +++ b/llvm/test/Transforms/Reassociate/negation.ll @@ -33,7 +33,7 @@ define i32 @test2(i32 %a, i32 %b, i32 %z) { define <2 x i32> @negate_vec_poisons(<2 x i32> %a, <2 x i32> %b, <2 x i32> %z) { ; CHECK-LABEL: @negate_vec_poisons( -; CHECK-NEXT: [[E:%.*]] = mul <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[E:%.*]] = mul <2 x i32> [[A:%.*]], splat (i32 40) ; CHECK-NEXT: [[F:%.*]] = mul <2 x i32> [[E]], [[Z:%.*]] ; CHECK-NEXT: ret <2 x i32> [[F]] ; @@ -51,7 +51,7 @@ define <2 x i32> @PR57683(<2 x i32> %x) { ; CHECK-NEXT: [[PARTIAL_NEG:%.*]] = sub <2 x i32> , [[X:%.*]] ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[PARTIAL_NEG]], <2 x i32> [[X]], <2 x i32> ; CHECK-NEXT: [[X_NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X]] -; CHECK-NEXT: [[SUB:%.*]] = add <2 x i32> [[X_NEG]], +; CHECK-NEXT: [[SUB:%.*]] = add <2 x i32> [[X_NEG]], splat (i32 1) ; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[SUB]], [[SHUF]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; @@ -67,7 +67,7 @@ define <2 x float> @PR57683_FP(<2 x float> %x) { ; CHECK-NEXT: [[PARTIAL_NEG:%.*]] = fsub reassoc nsz <2 x float> , [[X:%.*]] ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[PARTIAL_NEG]], <2 x float> [[X]], <2 x i32> ; CHECK-NEXT: [[X_NEG:%.*]] = fneg reassoc nsz <2 x float> [[X]] -; CHECK-NEXT: [[SUB:%.*]] = fadd reassoc nsz <2 x float> [[X_NEG]], +; CHECK-NEXT: [[SUB:%.*]] = fadd reassoc nsz <2 x float> [[X_NEG]], splat (float 1.000000e+00) ; CHECK-NEXT: [[R:%.*]] = fadd reassoc nsz <2 x float> [[SUB]], [[SHUF]] ; CHECK-NEXT: ret <2 x float> [[R]] ; diff --git a/llvm/test/Transforms/Reassociate/xor_reassoc.ll b/llvm/test/Transforms/Reassociate/xor_reassoc.ll index 97efb8f4e59415..729535a6fa0966 100644 --- a/llvm/test/Transforms/Reassociate/xor_reassoc.ll +++ b/llvm/test/Transforms/Reassociate/xor_reassoc.ll @@ -25,8 +25,8 @@ define i32 @xor1(i32 %x) { ; define <2 x i32> @xor1_vec(<2 x i32> %x) { ; CHECK-LABEL: @xor1_vec( -; CHECK-NEXT: [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[AND_RA]], +; CHECK-NEXT: [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 435) +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[AND_RA]], splat (i32 435) ; CHECK-NEXT: ret <2 x i32> [[XOR]] ; %or = or <2 x i32> %x, @@ -54,7 +54,7 @@ define i32 @xor2(i32 %x, i32 %y) { ; Real testing case : (x & 123) ^ y ^ (x & 345) => (x & 435) ^ y define <2 x i32> @xor2_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @xor2_vec( -; CHECK-NEXT: [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 435) ; CHECK-NEXT: [[XOR2:%.*]] = xor <2 x i32> [[AND_RA]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[XOR2]] ; @@ -85,8 +85,8 @@ define i32 @xor3(i32 %x, i32 %y) { ; c3 = ~c1 ^ c2 define <2 x i32> @xor3_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @xor3_vec( -; CHECK-NEXT: [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -436) +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[Y:%.*]], splat (i32 123) ; CHECK-NEXT: [[XOR1:%.*]] = xor <2 x i32> [[XOR]], [[AND_RA]] ; CHECK-NEXT: ret <2 x i32> [[XOR1]] ; @@ -114,8 +114,8 @@ define i32 @xor4(i32 %x, i32 %y) { ; Test rule: (x | c1) ^ c2 = (x & ~c1) ^ (c1 ^ c2) define <2 x i32> @xor4_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @xor4_vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -124) +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[Y:%.*]], splat (i32 435) ; CHECK-NEXT: [[XOR1:%.*]] = xor <2 x i32> [[XOR]], [[AND]] ; CHECK-NEXT: ret <2 x i32> [[XOR1]] ; @@ -149,7 +149,7 @@ define i32 @xor_special1(i32 %x, i32 %y) { ; (x | c1) ^ (x & ~c1) = c1 define <2 x i32> @xor_special1_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @xor_special1_vec( -; CHECK-NEXT: [[XOR1:%.*]] = xor <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[XOR1:%.*]] = xor <2 x i32> [[Y:%.*]], splat (i32 123) ; CHECK-NEXT: ret <2 x i32> [[XOR1]] ; %or = or <2 x i32> %x, @@ -178,7 +178,7 @@ define i32 @xor_special2(i32 %x, i32 %y) { ; (x | c1) ^ (x & c1) = x ^ c1 define <2 x i32> @xor_special2_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @xor_special2_vec( -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 123) ; CHECK-NEXT: [[XOR1:%.*]] = xor <2 x i32> [[XOR]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[XOR1]] ; diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll b/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll index 28600b22198925..caf4d5274bed96 100644 --- a/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll @@ -7,16 +7,16 @@ define void @test_vector_clobber(ptr addrspace(1) %ptr) gc "statepoint-example" ; CHECK-NEXT: entry: ; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(void ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "deopt"(i32 0, i32 2, i32 0, i32 62, i32 0, i32 13, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 14, i32 3, i32 -2406, i32 3, i32 28963, i32 3, i32 30401, i32 3, i32 -11, i32 3, i32 -5, i32 3, i32 1, i32 0, ptr addrspace(1) [[PTR:%.*]], i32 0, ptr addrspace(1) [[PTR]], i32 7, ptr null), "gc-live"(ptr addrspace(1) [[PTR]]) ] ; CHECK-NEXT: [[PTR_RELOCATED:%.*]] = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token [[STATEPOINT_TOKEN]], i32 0, i32 0) -; CHECK-NEXT: [[DOTSPLATINSERT_BASE:%.*]] = insertelement <8 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[PTR_RELOCATED]], i64 0, !is_base_value !0 +; CHECK-NEXT: [[DOTSPLATINSERT_BASE:%.*]] = insertelement <8 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[PTR_RELOCATED]], i64 0, !is_base_value [[META0:![0-9]+]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x ptr addrspace(1)> poison, ptr addrspace(1) [[PTR_RELOCATED]], i64 0 -; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <8 x ptr addrspace(1)> [[DOTSPLATINSERT_BASE]], <8 x ptr addrspace(1)> poison, <8 x i32> zeroinitializer, !is_base_value !0 +; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <8 x ptr addrspace(1)> [[DOTSPLATINSERT_BASE]], <8 x ptr addrspace(1)> poison, <8 x i32> zeroinitializer, !is_base_value [[META0]] ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x ptr addrspace(1)> [[DOTSPLATINSERT]], <8 x ptr addrspace(1)> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, <8 x ptr addrspace(1)> [[DOTSPLAT]], <8 x i64> ; CHECK-NEXT: [[STATEPOINT_TOKEN1:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(void ()) @bar, i32 0, i32 0, i32 0, i32 0) [ "deopt"(i32 0, i32 1, i32 0, i32 112, i32 0, i32 13, i32 0, i32 7, ptr null, i32 7, ptr null, i32 3, i32 poison, i32 3, i32 14, i32 3, i32 -2406, i32 3, i32 28963, i32 3, i32 30401, i32 3, i32 -11, i32 3, i32 -5, i32 3, i32 1, i32 0, ptr addrspace(1) [[PTR_RELOCATED]], i32 0, ptr addrspace(1) [[PTR_RELOCATED]], i32 7, ptr null), "gc-live"(<8 x ptr addrspace(1)> [[GEP]], ptr addrspace(1) [[PTR_RELOCATED]], <8 x ptr addrspace(1)> [[DOTSPLAT_BASE]]) ] ; CHECK-NEXT: [[GEP_RELOCATED:%.*]] = call coldcc <8 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v8p1(token [[STATEPOINT_TOKEN1]], i32 2, i32 0) ; CHECK-NEXT: [[PTR_RELOCATED2:%.*]] = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token [[STATEPOINT_TOKEN1]], i32 1, i32 1) ; CHECK-NEXT: [[DOTSPLAT_BASE_RELOCATED:%.*]] = call coldcc <8 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v8p1(token [[STATEPOINT_TOKEN1]], i32 2, i32 2) -; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p1(<8 x ptr addrspace(1)> [[GEP_RELOCATED]], i32 4, <8 x i1> , <8 x float> poison) +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p1(<8 x ptr addrspace(1)> [[GEP_RELOCATED]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison) ; CHECK-NEXT: unreachable ; entry: diff --git a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll index 1f2fbb6f53cdd8..7cae2d93e03152 100644 --- a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll +++ b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll @@ -55,8 +55,8 @@ entry: define <4 x i8> @range_from_lshr_vec_2(<4 x i8> %a) { ; CHECK-LABEL: @range_from_lshr_vec_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_SHR:%.*]] = lshr <4 x i8> [[A:%.*]], -; CHECK-NEXT: [[ADD_1:%.*]] = add nuw <4 x i8> [[A_SHR]], +; CHECK-NEXT: [[A_SHR:%.*]] = lshr <4 x i8> [[A:%.*]], splat (i8 1) +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw <4 x i8> [[A_SHR]], splat (i8 2) ; CHECK-NEXT: ret <4 x i8> [[ADD_1]] ; entry: @@ -196,7 +196,7 @@ entry: define internal <4 x i8> @test_propagate_argument(<4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: @test_propagate_argument( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD:%.*]] = add <4 x i8> [[A:%.*]], +; CHECK-NEXT: [[ADD:%.*]] = add <4 x i8> [[A:%.*]], splat (i8 3) ; CHECK-NEXT: ret <4 x i8> [[ADD]] ; entry: @@ -206,7 +206,7 @@ entry: define <4 x i8> @test_propagate_caller(<4 x i8> %a) { ; CHECK-LABEL: @test_propagate_caller( -; CHECK-NEXT: [[RES_1:%.*]] = call <4 x i8> @test_propagate_argument(<4 x i8> [[A:%.*]], <4 x i8> ) +; CHECK-NEXT: [[RES_1:%.*]] = call <4 x i8> @test_propagate_argument(<4 x i8> [[A:%.*]], <4 x i8> splat (i8 3)) ; CHECK-NEXT: ret <4 x i8> [[RES_1]] ; %add = add <4 x i8> , diff --git a/llvm/test/Transforms/SCCP/intrinsics.ll b/llvm/test/Transforms/SCCP/intrinsics.ll index 5edb31738e685b..27ada6339ea06c 100644 --- a/llvm/test/Transforms/SCCP/intrinsics.ll +++ b/llvm/test/Transforms/SCCP/intrinsics.ll @@ -127,7 +127,7 @@ define <4 x i32> @pr63380(<4 x i32> %input) { ; CHECK-LABEL: @pr63380( ; CHECK-NEXT: [[CTLZ_1:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[INPUT:%.*]], i1 false) ; CHECK-NEXT: [[CTLZ_2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[CTLZ_1]], i1 true) -; CHECK-NEXT: ret <4 x i32> +; CHECK-NEXT: ret <4 x i32> splat (i32 27) ; %ctlz.1 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %input, i1 false) %ctlz.2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %ctlz.1, i1 true) diff --git a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll index d9dc0459b5ced6..8c15b0820de255 100644 --- a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll +++ b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll @@ -310,7 +310,7 @@ define i16 @vector_binop_and_cast() { ; CHECK-LABEL: define i16 @vector_binop_and_cast() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x i16> , i16 undef, i32 0 -; CHECK-NEXT: [[REM:%.*]] = srem <8 x i16> , [[VECINIT7]] +; CHECK-NEXT: [[REM:%.*]] = srem <8 x i16> splat (i16 2), [[VECINIT7]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[REM]] to i128 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i16 ; CHECK-NEXT: ret i16 [[TMP1]] diff --git a/llvm/test/Transforms/SCCP/overdefined-ext.ll b/llvm/test/Transforms/SCCP/overdefined-ext.ll index 05819c32d522db..e08acd20cc2ace 100644 --- a/llvm/test/Transforms/SCCP/overdefined-ext.ll +++ b/llvm/test/Transforms/SCCP/overdefined-ext.ll @@ -26,7 +26,7 @@ define i1 @zext_icmp(i1 %t0) { define <2 x i1> @zext_vector(<2 x i1> %t0) { ; CHECK-LABEL: @zext_vector( ; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32> -; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T1]], +; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[T2]] ; %t1 = zext <2 x i1> %t0 to <2 x i32> @@ -37,7 +37,7 @@ define <2 x i1> @zext_vector(<2 x i1> %t0) { define <2 x i1> @zext_vector2(<2 x i1> %t0) { ; CHECK-LABEL: @zext_vector2( ; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32> -; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <2 x i32> [[T1]], +; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <2 x i32> [[T1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %t1 = zext <2 x i1> %t0 to <2 x i32> @@ -74,7 +74,7 @@ define i1 @sext_icmp(i1 %t0) { define <2 x i1> @sext_vector(<2 x i1> %t0) { ; CHECK-LABEL: @sext_vector( ; CHECK-NEXT: [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32> -; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T1]], +; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> [[T2]] ; %t1 = sext <2 x i1> %t0 to <2 x i32> @@ -85,7 +85,7 @@ define <2 x i1> @sext_vector(<2 x i1> %t0) { define <2 x i1> @sext_vector2(<2 x i1> %t0) { ; CHECK-LABEL: @sext_vector2( ; CHECK-NEXT: [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32> -; CHECK-NEXT: [[T2:%.*]] = add nsw <2 x i32> [[T1]], +; CHECK-NEXT: [[T2:%.*]] = add nsw <2 x i32> [[T1]], splat (i32 2) ; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %t1 = sext <2 x i1> %t0 to <2 x i32> diff --git a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll index 92d84f71bd9d46..f968ef6509748b 100644 --- a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll +++ b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll @@ -47,7 +47,7 @@ define <4 x i16> @range_from_and_nuw_vec(<4 x i32> %a) { ; CHECK-LABEL: define <4 x i16> @range_from_and_nuw_vec( ; CHECK-SAME: <4 x i32> [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[A]], +; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[A]], splat (i32 65535) ; CHECK-NEXT: [[TRUNC1:%.*]] = trunc nuw <4 x i32> [[AND1]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[TRUNC1]] ; diff --git a/llvm/test/Transforms/SCCP/vector-bitcast.ll b/llvm/test/Transforms/SCCP/vector-bitcast.ll index bed5c02d73bc51..725e3bc690ebae 100644 --- a/llvm/test/Transforms/SCCP/vector-bitcast.ll +++ b/llvm/test/Transforms/SCCP/vector-bitcast.ll @@ -12,8 +12,8 @@ define void @foo(ptr %p) nounwind { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[WHILE_BODY_I:%.*]] ; CHECK: while.body.i: -; CHECK-NEXT: [[VWORKEXPONENT_I_033:%.*]] = phi <4 x i32> [ [[SUB_I_I:%.*]], [[WHILE_BODY_I]] ], [ , [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SUB_I_I]] = add <4 x i32> [[VWORKEXPONENT_I_033]], +; CHECK-NEXT: [[VWORKEXPONENT_I_033:%.*]] = phi <4 x i32> [ [[SUB_I_I:%.*]], [[WHILE_BODY_I]] ], [ splat (i32 939524096), [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SUB_I_I]] = add <4 x i32> [[VWORKEXPONENT_I_033]], splat (i32 -8388608) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[SUB_I_I]] to <2 x i64> ; CHECK-NEXT: store volatile <2 x i64> zeroinitializer, ptr [[P:%.*]], align 16 ; CHECK-NEXT: br label [[WHILE_BODY_I]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll index e972955e26cb47..252fe7361f07c6 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll @@ -473,7 +473,7 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(<4 x i32> %a) define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const(<4 x i32> %a) ; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const( ; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], splat (i32 5) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; { @@ -495,7 +495,7 @@ define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const(<4 x i32> %a) define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const_pow2(<4 x i32> %a) ; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const_pow2( ; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], splat (i32 4) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; { diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll index 0c6a976b51a1d7..2b5ee59aeb1638 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll @@ -12,8 +12,8 @@ define i16 @foo(i16 %in1, i16 %in2) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i16> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw <2 x i64> [[TMP5]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = and <2 x i64> [[TMP9]], -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i64> [[TMP9]], splat (i64 65535) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[TMP12]], splat (i64 65533) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 ; CHECK-NEXT: [[ZEXT3_1:%.*]] = zext i1 [[TMP8]] to i16 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll index e60e356e5cd819..f9953df6c1735d 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll @@ -33,8 +33,8 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, ptr [[PA_ADDR_0271]], i64 4 ; CHECK-NEXT: [[ADD_PTR8]] = getelementptr inbounds i32, ptr [[PB_ADDR_0270]], i64 4 ; CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[TMP5]], [[TMP4]] -; CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[TMP4]], -; CHECK-NEXT: [[NOT_I242:%.*]] = xor <4 x i32> [[TMP5]], +; CHECK-NEXT: [[NOT_I:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1) +; CHECK-NEXT: [[NOT_I242:%.*]] = xor <4 x i32> [[TMP5]], splat (i32 -1) ; CHECK-NEXT: [[AND_I243:%.*]] = and <4 x i32> [[NOT_I242]], [[NOT_I]] ; CHECK-NEXT: [[AND_I245:%.*]] = and <4 x i32> [[TMP4]], [[NOT_I242]] ; CHECK-NEXT: [[AND_I247:%.*]] = and <4 x i32> [[TMP5]], [[NOT_I]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll index 45030a0965e006..ec9c01c99256b0 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll @@ -20,9 +20,9 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C:%.*]], i64 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i64 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i64 3 -; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], +; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], splat (i32 15) +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], splat (i32 65537) +; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], splat (i32 65535) ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll index 8987f34e561e6f..7ae336e2ccee98 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -16,7 +16,7 @@ define void @PR28330(i32 %n) { ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80) ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; DEFAULT-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[P17]] ; DEFAULT-NEXT: br label [[FOR_BODY]] @@ -28,7 +28,7 @@ define void @PR28330(i32 %n) { ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: ; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; GATHER-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; GATHER-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80) ; GATHER-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; GATHER-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[P17]] ; GATHER-NEXT: br label [[FOR_BODY]] @@ -40,7 +40,7 @@ define void @PR28330(i32 %n) { ; MAX-COST-NEXT: br label [[FOR_BODY:%.*]] ; MAX-COST: for.body: ; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80) ; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; MAX-COST-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[P17]] ; MAX-COST-NEXT: br label [[FOR_BODY]] @@ -93,7 +93,7 @@ define void @PR32038(i32 %n) { ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80) ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; DEFAULT-NEXT: [[OP_RDX]] = add i32 [[TMP3]], -5 ; DEFAULT-NEXT: br label [[FOR_BODY]] @@ -105,7 +105,7 @@ define void @PR32038(i32 %n) { ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: ; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; GATHER-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; GATHER-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80) ; GATHER-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; GATHER-NEXT: [[OP_RDX]] = add i32 [[TMP3]], -5 ; GATHER-NEXT: br label [[FOR_BODY]] @@ -117,7 +117,7 @@ define void @PR32038(i32 %n) { ; MAX-COST-NEXT: br label [[FOR_BODY:%.*]] ; MAX-COST: for.body: ; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> +; MAX-COST-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80) ; MAX-COST-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; MAX-COST-NEXT: [[OP_RDX]] = add i32 [[TMP3]], -5 ; MAX-COST-NEXT: br label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll index 90a30808aeeba9..a5371cd051a5b9 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll @@ -20,7 +20,7 @@ define void @f_noalias(ptr noalias nocapture %dst, ptr noalias nocapture readonl ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <4 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <4 x i32> [[TMP9]], splat (i32 256) ; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[TMP9]], zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i1> [[TMP11]] to <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP9]], <4 x i32> [[TMP12]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 5f0b16048d40c8..7622f9bc5c41d9 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -1257,7 +1257,7 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]] -; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], +; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16) ; CHECK-NEXT: [[TMP43:%.*]] = add nsw <16 x i32> [[TMP42]], [[TMP26]] ; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = add nsw <16 x i32> [[TMP43]], [[TMP44]] @@ -1275,9 +1275,9 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP57:%.*]] = add nsw <16 x i32> [[TMP55]], [[TMP56]] ; CHECK-NEXT: [[TMP58:%.*]] = sub nsw <16 x i32> [[TMP55]], [[TMP56]] ; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> -; CHECK-NEXT: [[TMP60:%.*]] = lshr <16 x i32> [[TMP59]], -; CHECK-NEXT: [[TMP61:%.*]] = and <16 x i32> [[TMP60]], -; CHECK-NEXT: [[TMP62:%.*]] = mul nuw <16 x i32> [[TMP61]], +; CHECK-NEXT: [[TMP60:%.*]] = lshr <16 x i32> [[TMP59]], splat (i32 15) +; CHECK-NEXT: [[TMP61:%.*]] = and <16 x i32> [[TMP60]], splat (i32 65537) +; CHECK-NEXT: [[TMP62:%.*]] = mul nuw <16 x i32> [[TMP61]], splat (i32 65535) ; CHECK-NEXT: [[TMP63:%.*]] = add <16 x i32> [[TMP62]], [[TMP59]] ; CHECK-NEXT: [[TMP64:%.*]] = xor <16 x i32> [[TMP63]], [[TMP62]] ; CHECK-NEXT: [[TMP65:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP64]]) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll index 8f6d5d8f2d7ec8..9f5744b17cb79e 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll @@ -325,7 +325,7 @@ define void @no_version(ptr nocapture %dst, ptr nocapture readonly %src) { ; CHECK-LABEL: @no_version( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[TMP0]], splat (i32 16) ; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -1242,13 +1242,13 @@ define void @crash_no_tracked_instructions(ptr %arg, ptr %arg.2, ptr %arg.3, i1 ; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], ; CHECK-NEXT: store float [[T26]], ptr [[T25]], align 4 ; CHECK-NEXT: [[T27:%.*]] = load float, ptr [[ARG_2:%.*]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP3]], splat (float 2.000000e+01) ; CHECK-NEXT: br label [[BB30]] ; CHECK: bb30: ; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x float> [ [[TMP4]], [[BB22]] ], [ [[TMP0]], [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[BB36:%.*]] ; CHECK: bb36: -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[TMP5]], +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[TMP5]], splat (float 3.000000e+00) ; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[ARG_3]], align 4 ; CHECK-NEXT: br label [[BB41:%.*]] ; CHECK: bb41: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll index 0c3c695a308cd4..476068dcd04969 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll @@ -9,7 +9,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], splat (i32 2) ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll index 1bd63b79b0f5ca..f79db7d7ad0cbd 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -200,9 +200,9 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw <4 x i32> [[TMP7]], +; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], splat (i32 15) +; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], splat (i32 65537) +; CHECK-NEXT: [[TMP8:%.*]] = mul nuw <4 x i32> [[TMP7]], splat (i32 65535) ; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i32> [[TMP9]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]]) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll index f99f6ecd333823..1330e5557e559b 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -200,9 +200,9 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw <4 x i32> [[TMP7]], +; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], splat (i32 15) +; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], splat (i32 65537) +; CHECK-NEXT: [[TMP8:%.*]] = mul nuw <4 x i32> [[TMP7]], splat (i32 65535) ; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i32> [[TMP9]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]]) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll index 0eda2cbc862ff0..9910090d43eae9 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll @@ -13,7 +13,7 @@ define dso_local void @l() local_unnamed_addr { ; CHECK: bb3: ; CHECK-NEXT: [[I4:%.*]] = zext i1 undef to i32 ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], undef -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], splat (i16 8) ; CHECK-NEXT: br label [[BB25]] ; CHECK: bb11: ; CHECK-NEXT: [[I12:%.*]] = zext i1 undef to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll index 7b27489782fc46..6e596fccd18f39 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll @@ -8,7 +8,7 @@ define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <15 x i8>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], splat (i8 10) ; NON-POW2-NEXT: store <15 x i8> [[TMP1]], ptr [[DST]], align 1 ; NON-POW2-NEXT: ret void ; @@ -17,12 +17,12 @@ define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) { ; POW2-ONLY-NEXT: entry: ; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], splat (i8 10) ; POW2-ONLY-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1 ; POW2-ONLY-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8 ; POW2-ONLY-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4 -; POW2-ONLY-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], +; POW2-ONLY-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], splat (i8 10) ; POW2-ONLY-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1 ; POW2-ONLY-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12 ; POW2-ONLY-NEXT: [[L_SRC_12:%.*]] = load i8, ptr [[GEP_SRC_12]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll index c18811a35c1eeb..feb4ad865f3147 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll @@ -7,7 +7,7 @@ define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], splat (i32 10) ; NON-POW2-NEXT: store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -18,7 +18,7 @@ define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) { ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], splat (i32 10) ; POW2-ONLY-NEXT: store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 @@ -160,7 +160,7 @@ define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) { ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4 ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4 ; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]] -; NON-POW2-NEXT: [[TMP3:%.*]] = add <3 x i32> [[TMP2]], +; NON-POW2-NEXT: [[TMP3:%.*]] = add <3 x i32> [[TMP2]], splat (i32 9) ; NON-POW2-NEXT: store <3 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -177,7 +177,7 @@ define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) { ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4 ; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]] -; POW2-ONLY-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], +; POW2-ONLY-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], splat (i32 9) ; POW2-ONLY-NEXT: store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store i32 [[ADD_2]], ptr [[DST_2]], align 4 @@ -221,7 +221,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01) ; NON-POW2-NEXT: store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -232,7 +232,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 ; POW2-ONLY-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01) ; POW2-ONLY-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll index c9b2e0ffc15f3d..2d94babb568742 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll @@ -568,8 +568,8 @@ define void @can_reorder_vec3_op_with_padding(ptr %A, <3 x float> %in) { ; NON-POW2-SAME: ptr [[A:%.*]], <3 x float> [[IN:%.*]]) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[TMP1:%.*]] = fsub <3 x float> [[IN]], [[IN]] -; NON-POW2-NEXT: [[TMP2:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP1]], <3 x float> , <3 x float> ) -; NON-POW2-NEXT: [[TMP3:%.*]] = fmul <3 x float> [[TMP2]], +; NON-POW2-NEXT: [[TMP2:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP1]], <3 x float> splat (float 2.000000e+00), <3 x float> splat (float 3.000000e+00)) +; NON-POW2-NEXT: [[TMP3:%.*]] = fmul <3 x float> [[TMP2]], splat (float 3.000000e+00) ; NON-POW2-NEXT: [[TMP4:%.*]] = shufflevector <3 x float> [[TMP3]], <3 x float> poison, <3 x i32> ; NON-POW2-NEXT: store <3 x float> [[TMP4]], ptr [[A]], align 4 ; NON-POW2-NEXT: ret void @@ -584,8 +584,8 @@ define void @can_reorder_vec3_op_with_padding(ptr %A, <3 x float> %in) { ; POW2-ONLY-NEXT: [[MUL6_I_I_I_I:%.*]] = fmul float [[TMP1]], 3.000000e+00 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = shufflevector <3 x float> [[IN]], <3 x float> poison, <2 x i32> ; POW2-ONLY-NEXT: [[TMP3:%.*]] = fsub <2 x float> [[TMP2]], [[TMP2]] -; POW2-ONLY-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> , <2 x float> ) -; POW2-ONLY-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP4]], +; POW2-ONLY-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> splat (float 2.000000e+00), <2 x float> splat (float 3.000000e+00)) +; POW2-ONLY-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP4]], splat (float 3.000000e+00) ; POW2-ONLY-NEXT: store <2 x float> [[TMP5]], ptr [[A]], align 4 ; POW2-ONLY-NEXT: store float [[MUL6_I_I_I_I]], ptr [[ARRAYIDX42_I]], align 4 ; POW2-ONLY-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll index ddeaff9327e3d2..77371c2ac7b1cd 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll @@ -9,8 +9,8 @@ define void @select_umin_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_umin_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -68,8 +68,8 @@ define void @select_umin_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_umin_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -104,10 +104,10 @@ define void @select_ule_ugt_mix_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_ule_ugt_mix_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP1]], splat (i32 16383) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -144,8 +144,8 @@ define void @select_umin_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_umin_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -169,8 +169,8 @@ define void @select_umin_ule_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_umin_ule_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -228,8 +228,8 @@ define void @select_umin_ule_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_umin_ule_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -266,8 +266,8 @@ define void @select_umin_ule_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_umin_ule_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -290,8 +290,8 @@ define void @select_smin_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_smin_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -349,8 +349,8 @@ define void @select_smin_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_smin_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -387,8 +387,8 @@ define void @select_smin_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_smin_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -411,8 +411,8 @@ define void @select_smin_sle_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_smin_sle_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -470,8 +470,8 @@ define void @select_smin_sle_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_smin_sle_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -508,8 +508,8 @@ define void @select_smin_sle_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_smin_sle_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -531,8 +531,8 @@ define void @select_umax_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_umax_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -590,8 +590,8 @@ define void @select_umax_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_umax_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -628,8 +628,8 @@ define void @select_umax_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_umax_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -652,8 +652,8 @@ define void @select_umax_uge_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_umax_uge_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -711,8 +711,8 @@ define void @select_umax_uge_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_umax_uge_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -749,8 +749,8 @@ define void @select_umax_uge_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_umax_uge_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -773,8 +773,8 @@ define void @select_smax_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_smax_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -832,8 +832,8 @@ define void @select_smax_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_smax_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -870,8 +870,8 @@ define void @select_smax_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_smax_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -895,8 +895,8 @@ define void @select_smax_sge_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_smax_sge_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], splat (i16 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; @@ -954,8 +954,8 @@ define void @select_smax_sge_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_smax_sge_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], splat (i32 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -992,8 +992,8 @@ define void @select_smax_sge_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_smax_sge_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], splat (i64 16383) +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll index 0fe4e6a5aa28b5..f9e415a3cefc13 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -107,7 +107,7 @@ define void @select_uniform_ugt_7xi8(ptr %ptr, i8 %x) { ; CHECK-LABEL: @select_uniform_ugt_7xi8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i8> [[TMP0]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TMP0]], <4 x i8> [[TMP3]] @@ -181,7 +181,7 @@ define void @select_uniform_ugt_8xi8(ptr %ptr, i8 %x) { ; CHECK-LABEL: @select_uniform_ugt_8xi8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <8 x i8> [[TMP0]], splat (i8 -1) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i8> [[TMP0]], <8 x i8> [[TMP3]] @@ -258,7 +258,7 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[L_11]], i32 11 ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12) -; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], splat (i8 -1) ; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP8]], <8 x i8> [[TMP0]], i64 0) ; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP10]], <4 x i8> [[TMP3]], i64 12) ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0 @@ -371,7 +371,7 @@ define void @select_uniform_ugt_4xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_uniform_ugt_4xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i16> [[TMP0]], splat (i16 16383) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP0]], <4 x i16> [[TMP3]] @@ -409,7 +409,7 @@ define void @select_uniform_ult_8xi16(ptr %ptr, i16 %x) { ; CHECK-LABEL: @select_uniform_ult_8xi16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <8 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <8 x i16> [[TMP0]], splat (i16 16383) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> poison, i16 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP0]], <8 x i16> [[TMP3]] @@ -470,7 +470,7 @@ define void @select_uniform_eq_2xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_uniform_eq_2xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i32> [[TMP0]], splat (i32 16383) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP0]], <2 x i32> [[TMP3]] @@ -496,7 +496,7 @@ define void @select_uniform_eq_4xi32(ptr %ptr, i32 %x) { ; CHECK-LABEL: @select_uniform_eq_4xi32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[TMP0]], splat (i32 16383) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP0]], <4 x i32> [[TMP3]] @@ -533,7 +533,7 @@ define void @select_uniform_ne_2xi64(ptr %ptr, i64 %x) { ; CHECK-LABEL: @select_uniform_ne_2xi64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i64> [[TMP0]], splat (i64 16383) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP0]], <2 x i64> [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll index a122d84629c1a2..b8bf38af3668dd 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll @@ -14,10 +14,10 @@ define void @PR50256(ptr %a, ptr %b, i32 %n) { ; CHECK-NEXT: [[ARRAYIDX3_8:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i16> [[TMP3]], splat (i16 8) ; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX_8]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[TMP7]] to <8 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw <8 x i16> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw <8 x i16> [[TMP8]], splat (i16 8) ; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[B]], align 2 ; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr [[ARRAYIDX3_8]], align 2 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll b/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll index 5132e491811691..13773bf901b9bf 100644 --- a/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll +++ b/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll @@ -11,8 +11,8 @@ define void @fusion(ptr noalias nocapture align 256 dereferenceable(19267584) %a ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[ARG1:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, ptr [[ARG:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[TMP11]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x half> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <2 x half> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x half> [[TMP1]], splat (half 0xH5380) +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <2 x half> [[TMP2]], splat (half 0xH57F0) ; CHECK-NEXT: store <2 x half> [[TMP3]], ptr [[TMP16]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index 8c6b92b65ae050..912d60d0cc3867 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -46,7 +46,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP50:%.*]] = zext <2 x i8> [[TMP27]] to <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = sub <2 x i32> [[TMP26]], [[TMP50]] -; CHECK-NEXT: [[TMP25:%.*]] = shl <2 x i32> [[TMP24]], +; CHECK-NEXT: [[TMP25:%.*]] = shl <2 x i32> [[TMP24]], splat (i32 16) ; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP25]], [[TMP23]] ; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32> @@ -59,7 +59,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP61:%.*]] = zext <2 x i8> [[TMP40]] to <2 x i32> ; CHECK-NEXT: [[TMP36:%.*]] = sub <2 x i32> [[TMP39]], [[TMP61]] -; CHECK-NEXT: [[TMP37:%.*]] = shl <2 x i32> [[TMP36]], +; CHECK-NEXT: [[TMP37:%.*]] = shl <2 x i32> [[TMP36]], splat (i32 16) ; CHECK-NEXT: [[TMP42:%.*]] = add <2 x i32> [[TMP37]], [[TMP35]] ; CHECK-NEXT: [[TMP43:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]] ; CHECK-NEXT: [[TMP44:%.*]] = sub <2 x i32> [[TMP30]], [[TMP42]] @@ -77,13 +77,13 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP54:%.*]] = load <2 x i8>, ptr null, align 1 ; CHECK-NEXT: [[TMP55:%.*]] = zext <2 x i8> [[TMP54]] to <2 x i32> ; CHECK-NEXT: [[TMP59:%.*]] = sub <2 x i32> [[TMP62]], [[TMP55]] -; CHECK-NEXT: [[TMP41:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP41:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) ; CHECK-NEXT: [[TMP58:%.*]] = zext <2 x i8> [[TMP41]] to <2 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <2 x i32> [[TMP58]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP63:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 ; CHECK-NEXT: [[TMP76:%.*]] = zext <2 x i8> [[TMP63]] to <2 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = sub <2 x i32> [[TMP60]], [[TMP76]] -; CHECK-NEXT: [[TMP46:%.*]] = shl <2 x i32> [[TMP45]], +; CHECK-NEXT: [[TMP46:%.*]] = shl <2 x i32> [[TMP45]], splat (i32 16) ; CHECK-NEXT: [[TMP90:%.*]] = add <2 x i32> [[TMP46]], [[TMP59]] ; CHECK-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 ; CHECK-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 @@ -93,12 +93,12 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP82:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 ; CHECK-NEXT: [[TMP91:%.*]] = zext <2 x i8> [[TMP82]] to <2 x i32> ; CHECK-NEXT: [[TMP65:%.*]] = sub <2 x i32> [[TMP79]], [[TMP91]] -; CHECK-NEXT: [[TMP75:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP75:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) ; CHECK-NEXT: [[TMP98:%.*]] = zext <2 x i8> [[TMP75]] to <2 x i32> ; CHECK-NEXT: [[TMP100:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 ; CHECK-NEXT: [[TMP103:%.*]] = zext <2 x i8> [[TMP100]] to <2 x i32> ; CHECK-NEXT: [[TMP69:%.*]] = sub <2 x i32> [[TMP98]], [[TMP103]] -; CHECK-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], +; CHECK-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], splat (i32 16) ; CHECK-NEXT: [[TMP74:%.*]] = add <2 x i32> [[TMP70]], [[TMP65]] ; CHECK-NEXT: [[TMP78:%.*]] = extractelement <2 x i32> [[TMP90]], i32 0 ; CHECK-NEXT: [[TMP71:%.*]] = extractelement <2 x i32> [[TMP90]], i32 1 @@ -160,7 +160,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP109:%.*]] = shufflevector <4 x i8> [[TMP150]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP89:%.*]] = zext <2 x i8> [[TMP109]] to <2 x i32> ; CHECK-NEXT: [[TMP87:%.*]] = sub <2 x i32> [[TMP108]], [[TMP89]] -; CHECK-NEXT: [[TMP88:%.*]] = shl <2 x i32> [[TMP87]], +; CHECK-NEXT: [[TMP88:%.*]] = shl <2 x i32> [[TMP87]], splat (i32 16) ; CHECK-NEXT: [[TMP112:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP120:%.*]] = zext <2 x i8> [[TMP112]] to <2 x i32> ; CHECK-NEXT: [[TMP94:%.*]] = shufflevector <4 x i8> [[TMP149]], <4 x i8> poison, <2 x i32> @@ -168,7 +168,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP131:%.*]] = shufflevector <4 x i8> [[TMP150]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP132:%.*]] = zext <2 x i8> [[TMP131]] to <2 x i32> ; CHECK-NEXT: [[TMP95:%.*]] = sub <2 x i32> [[TMP128]], [[TMP132]] -; CHECK-NEXT: [[TMP96:%.*]] = shl <2 x i32> [[TMP95]], +; CHECK-NEXT: [[TMP96:%.*]] = shl <2 x i32> [[TMP95]], splat (i32 16) ; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV33]], i32 1 ; CHECK-NEXT: [[TMP117:%.*]] = sub <2 x i32> [[TMP97]], [[TMP120]] ; CHECK-NEXT: [[TMP105:%.*]] = add <2 x i32> [[TMP96]], [[TMP117]] @@ -204,7 +204,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP118:%.*]] = shufflevector <4 x i8> [[TMP159]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP118]] to <2 x i32> ; CHECK-NEXT: [[TMP124:%.*]] = sub <2 x i32> [[TMP115]], [[TMP134]] -; CHECK-NEXT: [[TMP125:%.*]] = shl <2 x i32> [[TMP124]], +; CHECK-NEXT: [[TMP125:%.*]] = shl <2 x i32> [[TMP124]], splat (i32 16) ; CHECK-NEXT: [[TMP127:%.*]] = shufflevector <4 x i8> [[TMP158]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP191:%.*]] = zext <2 x i8> [[TMP127]] to <2 x i32> ; CHECK-NEXT: [[TMP160:%.*]] = shufflevector <4 x i8> [[TMP121]], <4 x i8> poison, <2 x i32> @@ -212,7 +212,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP171:%.*]] = shufflevector <4 x i8> [[TMP159]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP172:%.*]] = zext <2 x i8> [[TMP171]] to <2 x i32> ; CHECK-NEXT: [[TMP135:%.*]] = sub <2 x i32> [[TMP161]], [[TMP172]] -; CHECK-NEXT: [[TMP136:%.*]] = shl <2 x i32> [[TMP135]], +; CHECK-NEXT: [[TMP136:%.*]] = shl <2 x i32> [[TMP135]], splat (i32 16) ; CHECK-NEXT: [[TMP137:%.*]] = insertelement <2 x i32> [[TMP110]], i32 [[CONV33_1]], i32 1 ; CHECK-NEXT: [[TMP173:%.*]] = sub <2 x i32> [[TMP137]], [[TMP191]] ; CHECK-NEXT: [[TMP174:%.*]] = add <2 x i32> [[TMP136]], [[TMP173]] @@ -235,9 +235,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[SUB47_1]], 15 ; CHECK-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 ; CHECK-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 -; CHECK-NEXT: [[TMP194:%.*]] = lshr <2 x i32> [[TMP110]], -; CHECK-NEXT: [[TMP154:%.*]] = and <2 x i32> [[TMP194]], -; CHECK-NEXT: [[TMP195:%.*]] = mul <2 x i32> [[TMP154]], +; CHECK-NEXT: [[TMP154:%.*]] = lshr <2 x i32> [[TMP110]], splat (i32 15) +; CHECK-NEXT: [[TMP184:%.*]] = and <2 x i32> [[TMP154]], splat (i32 65537) +; CHECK-NEXT: [[TMP195:%.*]] = mul <2 x i32> [[TMP184]], splat (i32 65535) ; CHECK-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD55]] ; CHECK-NEXT: [[SUB86:%.*]] = sub i32 [[ADD55]], [[ADD48_1]] ; CHECK-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] @@ -312,9 +312,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]] ; CHECK-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_4]], [[ADD105_3]] ; CHECK-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV1]] -; CHECK-NEXT: [[TMP184:%.*]] = lshr <2 x i32> [[TMP102]], -; CHECK-NEXT: [[TMP185:%.*]] = and <2 x i32> [[TMP184]], -; CHECK-NEXT: [[TMP186:%.*]] = mul <2 x i32> [[TMP185]], +; CHECK-NEXT: [[TMP185:%.*]] = lshr <2 x i32> [[TMP102]], splat (i32 15) +; CHECK-NEXT: [[TMP193:%.*]] = and <2 x i32> [[TMP185]], splat (i32 65537) +; CHECK-NEXT: [[TMP186:%.*]] = mul <2 x i32> [[TMP193]], splat (i32 65535) ; CHECK-NEXT: [[TMP187:%.*]] = add <2 x i32> [[TMP186]], [[TMP183]] ; CHECK-NEXT: [[TMP188:%.*]] = xor <2 x i32> [[TMP187]], [[TMP102]] ; CHECK-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 @@ -373,7 +373,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP27:%.*]] = zext <2 x i8> [[TMP26]] to <2 x i32> ; THR15-NEXT: [[TMP28:%.*]] = sub <2 x i32> [[TMP25]], [[TMP27]] -; THR15-NEXT: [[TMP29:%.*]] = shl <2 x i32> [[TMP28]], +; THR15-NEXT: [[TMP29:%.*]] = shl <2 x i32> [[TMP28]], splat (i32 16) ; THR15-NEXT: [[TMP59:%.*]] = add <2 x i32> [[TMP29]], [[TMP23]] ; THR15-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP32:%.*]] = zext <2 x i8> [[TMP31]] to <2 x i32> @@ -386,7 +386,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP39:%.*]] = zext <2 x i8> [[TMP38]] to <2 x i32> ; THR15-NEXT: [[TMP40:%.*]] = sub <2 x i32> [[TMP37]], [[TMP39]] -; THR15-NEXT: [[TMP41:%.*]] = shl <2 x i32> [[TMP40]], +; THR15-NEXT: [[TMP41:%.*]] = shl <2 x i32> [[TMP40]], splat (i32 16) ; THR15-NEXT: [[TMP76:%.*]] = add <2 x i32> [[TMP41]], [[TMP35]] ; THR15-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP76]], [[TMP59]] ; THR15-NEXT: [[TMP42:%.*]] = sub <2 x i32> [[TMP59]], [[TMP76]] @@ -404,13 +404,13 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP50:%.*]] = load <2 x i8>, ptr null, align 1 ; THR15-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP50]] to <2 x i32> ; THR15-NEXT: [[TMP52:%.*]] = sub <2 x i32> [[TMP49]], [[TMP51]] -; THR15-NEXT: [[TMP53:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> , i32 2) +; THR15-NEXT: [[TMP53:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) ; THR15-NEXT: [[TMP54:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> ; THR15-NEXT: [[TMP77:%.*]] = shufflevector <2 x i32> [[TMP54]], <2 x i32> poison, <2 x i32> ; THR15-NEXT: [[TMP55:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1 ; THR15-NEXT: [[TMP56:%.*]] = zext <2 x i8> [[TMP55]] to <2 x i32> ; THR15-NEXT: [[TMP57:%.*]] = sub <2 x i32> [[TMP77]], [[TMP56]] -; THR15-NEXT: [[TMP58:%.*]] = shl <2 x i32> [[TMP57]], +; THR15-NEXT: [[TMP58:%.*]] = shl <2 x i32> [[TMP57]], splat (i32 16) ; THR15-NEXT: [[TMP72:%.*]] = add <2 x i32> [[TMP58]], [[TMP52]] ; THR15-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 ; THR15-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 @@ -420,12 +420,12 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP62:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_3]], align 1 ; THR15-NEXT: [[TMP63:%.*]] = zext <2 x i8> [[TMP62]] to <2 x i32> ; THR15-NEXT: [[TMP64:%.*]] = sub <2 x i32> [[TMP61]], [[TMP63]] -; THR15-NEXT: [[TMP65:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> , <2 x i8> poison) +; THR15-NEXT: [[TMP65:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> zeroinitializer, i32 1, <2 x i1> splat (i1 true), <2 x i8> poison) ; THR15-NEXT: [[TMP66:%.*]] = zext <2 x i8> [[TMP65]] to <2 x i32> ; THR15-NEXT: [[TMP67:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1 ; THR15-NEXT: [[TMP68:%.*]] = zext <2 x i8> [[TMP67]] to <2 x i32> ; THR15-NEXT: [[TMP69:%.*]] = sub <2 x i32> [[TMP66]], [[TMP68]] -; THR15-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], +; THR15-NEXT: [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], splat (i32 16) ; THR15-NEXT: [[TMP73:%.*]] = add <2 x i32> [[TMP70]], [[TMP64]] ; THR15-NEXT: [[TMP74:%.*]] = extractelement <2 x i32> [[TMP72]], i32 0 ; THR15-NEXT: [[TMP75:%.*]] = extractelement <2 x i32> [[TMP72]], i32 1 @@ -487,7 +487,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP98:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP99:%.*]] = zext <2 x i8> [[TMP98]] to <2 x i32> ; THR15-NEXT: [[TMP100:%.*]] = sub <2 x i32> [[TMP97]], [[TMP99]] -; THR15-NEXT: [[TMP101:%.*]] = shl <2 x i32> [[TMP100]], +; THR15-NEXT: [[TMP101:%.*]] = shl <2 x i32> [[TMP100]], splat (i32 16) ; THR15-NEXT: [[TMP102:%.*]] = shufflevector <4 x i8> [[TMP143]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP103:%.*]] = zext <2 x i8> [[TMP102]] to <2 x i32> ; THR15-NEXT: [[TMP104:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <2 x i32> @@ -495,7 +495,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP106:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP107:%.*]] = zext <2 x i8> [[TMP106]] to <2 x i32> ; THR15-NEXT: [[TMP108:%.*]] = sub <2 x i32> [[TMP105]], [[TMP107]] -; THR15-NEXT: [[TMP109:%.*]] = shl <2 x i32> [[TMP108]], +; THR15-NEXT: [[TMP109:%.*]] = shl <2 x i32> [[TMP108]], splat (i32 16) ; THR15-NEXT: [[TMP110:%.*]] = insertelement <2 x i32> [[TMP93]], i32 [[CONV33]], i32 1 ; THR15-NEXT: [[TMP111:%.*]] = sub <2 x i32> [[TMP110]], [[TMP103]] ; THR15-NEXT: [[TMP112:%.*]] = add <2 x i32> [[TMP109]], [[TMP111]] @@ -531,7 +531,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP129:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP130:%.*]] = zext <2 x i8> [[TMP129]] to <2 x i32> ; THR15-NEXT: [[TMP131:%.*]] = sub <2 x i32> [[TMP128]], [[TMP130]] -; THR15-NEXT: [[TMP132:%.*]] = shl <2 x i32> [[TMP131]], +; THR15-NEXT: [[TMP132:%.*]] = shl <2 x i32> [[TMP131]], splat (i32 16) ; THR15-NEXT: [[TMP138:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP139:%.*]] = zext <2 x i8> [[TMP138]] to <2 x i32> ; THR15-NEXT: [[TMP154:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <2 x i32> @@ -539,7 +539,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP133:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> ; THR15-NEXT: [[TMP134:%.*]] = zext <2 x i8> [[TMP133]] to <2 x i32> ; THR15-NEXT: [[TMP135:%.*]] = sub <2 x i32> [[TMP155]], [[TMP134]] -; THR15-NEXT: [[TMP170:%.*]] = shl <2 x i32> [[TMP135]], +; THR15-NEXT: [[TMP170:%.*]] = shl <2 x i32> [[TMP135]], splat (i32 16) ; THR15-NEXT: [[TMP140:%.*]] = insertelement <2 x i32> [[TMP124]], i32 [[CONV33_1]], i32 1 ; THR15-NEXT: [[TMP141:%.*]] = sub <2 x i32> [[TMP140]], [[TMP139]] ; THR15-NEXT: [[TMP171:%.*]] = add <2 x i32> [[TMP170]], [[TMP141]] @@ -562,9 +562,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[SHR_I54_1:%.*]] = lshr i32 [[TMP151]], 15 ; THR15-NEXT: [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537 ; THR15-NEXT: [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535 -; THR15-NEXT: [[TMP156:%.*]] = lshr <2 x i32> [[TMP124]], -; THR15-NEXT: [[TMP157:%.*]] = and <2 x i32> [[TMP156]], -; THR15-NEXT: [[TMP158:%.*]] = mul <2 x i32> [[TMP157]], +; THR15-NEXT: [[TMP156:%.*]] = lshr <2 x i32> [[TMP124]], splat (i32 15) +; THR15-NEXT: [[TMP157:%.*]] = and <2 x i32> [[TMP156]], splat (i32 65537) +; THR15-NEXT: [[TMP158:%.*]] = mul <2 x i32> [[TMP157]], splat (i32 65535) ; THR15-NEXT: [[ADD78:%.*]] = add i32 [[ADD48_2]], [[ADD48]] ; THR15-NEXT: [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_2]] ; THR15-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] @@ -639,9 +639,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]] ; THR15-NEXT: [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_3]] ; THR15-NEXT: [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV]] -; THR15-NEXT: [[TMP179:%.*]] = lshr <2 x i32> [[TMP93]], -; THR15-NEXT: [[TMP180:%.*]] = and <2 x i32> [[TMP179]], -; THR15-NEXT: [[TMP181:%.*]] = mul <2 x i32> [[TMP180]], +; THR15-NEXT: [[TMP179:%.*]] = lshr <2 x i32> [[TMP93]], splat (i32 15) +; THR15-NEXT: [[TMP180:%.*]] = and <2 x i32> [[TMP179]], splat (i32 65537) +; THR15-NEXT: [[TMP181:%.*]] = mul <2 x i32> [[TMP180]], splat (i32 65535) ; THR15-NEXT: [[TMP182:%.*]] = add <2 x i32> [[TMP181]], [[TMP178]] ; THR15-NEXT: [[TMP183:%.*]] = xor <2 x i32> [[TMP182]], [[TMP93]] ; THR15-NEXT: [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll index 85e0b183956d86..da8cd38e3108d7 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll @@ -62,7 +62,7 @@ define void @fp_sub(ptr %dst, ptr %p) { ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], splat (float 3.000000e+00) ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; CHECK-NEXT: ret void ; @@ -70,7 +70,7 @@ define void @fp_sub(ptr %dst, ptr %p) { ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], splat (float 3.000000e+00) ; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; @@ -156,7 +156,7 @@ define void @fp_div(ptr %dst, ptr %p) { ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], splat (float 1.050000e+01) ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; CHECK-NEXT: ret void ; @@ -164,7 +164,7 @@ define void @fp_div(ptr %dst, ptr %p) { ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], splat (float 1.050000e+01) ; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; @@ -254,7 +254,7 @@ define void @fp_min(ptr %dst, ptr %p) { ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> splat (float 1.250000e+00)) ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; CHECK-NEXT: ret void ; @@ -262,7 +262,7 @@ define void @fp_min(ptr %dst, ptr %p) { ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> ) +; DEFAULT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> splat (float 1.250000e+00)) ; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll index f07b6bbe8d6621..9269a710c61d3b 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll @@ -9,8 +9,8 @@ define void @test(ptr %c) { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> , <8 x i8> poison) -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP3]], i32 1, <8 x i1> , <8 x i8> poison) +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> splat (i1 true), <8 x i8> poison) +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP3]], i32 1, <8 x i1> splat (i1 true), <8 x i8> poison) ; CHECK-NEXT: br label %[[FOR_COND:.*]] ; CHECK: [[FOR_COND]]: ; CHECK-NEXT: [[A_PROMOTED2226:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[FOR_COND]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll index 3872cb0c3a340c..fc06866c1e9553 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll @@ -12,7 +12,7 @@ define void @f(ptr %dest, i64 %i) { ; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[P1]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -46,7 +46,7 @@ define void @g(ptr %dest, i64 %i) { ; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 2048 -; CHECK-NEXT: store <4 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[P1]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -77,7 +77,7 @@ define void @h(ptr %dest, i32 %i) { ; CHECK-SAME: (ptr [[DEST:%.*]], i32 [[I:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P1:%.*]] = getelementptr [4 x i32], ptr [[DEST]], i32 [[I]], i32 0 -; CHECK-NEXT: store <4 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[P1]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll index 747a1ae2b5ce4c..d9a9e9bf1a7736 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll @@ -7,14 +7,14 @@ define void @vec_add(ptr %dest, ptr %p) { ; CHECK-LABEL: @vec_add( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], splat (i16 1) ; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @vec_add( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], splat (i16 1) ; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; @@ -36,14 +36,14 @@ define void @vec_sub(ptr %dest, ptr %p) { ; CHECK-LABEL: @vec_sub( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], splat (i16 17) ; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @vec_sub( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], splat (i16 17) ; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; @@ -65,14 +65,14 @@ define void @vec_rsub(ptr %dest, ptr %p) { ; CHECK-LABEL: @vec_rsub( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i16> , [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i16> splat (i16 29), [[TMP0]] ; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @vec_rsub( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = sub <2 x i16> , [[TMP0]] +; DEFAULT-NEXT: [[TMP1:%.*]] = sub <2 x i16> splat (i16 29), [[TMP0]] ; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; @@ -94,14 +94,14 @@ define void @vec_mul(ptr %dest, ptr %p) { ; CHECK-LABEL: @vec_mul( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], splat (i16 7) ; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @vec_mul( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], splat (i16 7) ; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; @@ -123,14 +123,14 @@ define void @vec_sdiv(ptr %dest, ptr %p) { ; CHECK-LABEL: @vec_sdiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], splat (i16 7) ; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @vec_sdiv( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], splat (i16 7) ; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll index 052b461b8d7501..a6c5273dc4cd0d 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll @@ -32,14 +32,14 @@ define void @vec_add(ptr %dest, ptr %p) { ; CHECK-LABEL: @vec_add( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], splat (i16 1) ; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @vec_add( ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], +; DEFAULT-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], splat (i16 1) ; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; @@ -179,12 +179,12 @@ entry: define void @splat_store_i32_one(ptr %dest) { ; CHECK-LABEL: @splat_store_i32_one( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <4 x i32> , ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; ; DEFAULT-LABEL: @splat_store_i32_one( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: store <4 x i32> , ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: store <4 x i32> splat (i32 1), ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/minbw-with-and-and-scalar-trunc.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/minbw-with-and-and-scalar-trunc.ll index d6dc3bcc3354c1..e50e4863e91af3 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/minbw-with-and-and-scalar-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/minbw-with-and-and-scalar-trunc.ll @@ -10,9 +10,9 @@ define i16 @test() { ; CHECK-LABEL: define i16 @test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> , i32 4) +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> splat (i1 true), i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], splat (i16 -1) ; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i32 ; CHECK-NEXT: [[T:%.*]] = trunc i32 [[TMP5]] to i16 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/phi-const.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/phi-const.ll index 3982a4523af40e..7d7e09758b9221 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/phi-const.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/phi-const.ll @@ -47,7 +47,7 @@ define void @g(ptr %p, i1 %c) { ; CHECK-NEXT: br label [[D]] ; CHECK: d: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i8> [ , [[A]] ], [ , [[B]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 1) ; CHECK-NEXT: store <2 x i8> [[TMP2]], ptr [[P_0]], align 1 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reduced-value-repeated-and-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reduced-value-repeated-and-vectorized.ll index d5e1a110c6277c..a985207fb97f44 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reduced-value-repeated-and-vectorized.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reduced-value-repeated-and-vectorized.ll @@ -5,7 +5,7 @@ define void @test() { ; CHECK-LABEL: define void @test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.experimental.vp.strided.load.v4i16.p0.i64(ptr align 2 null, i64 6, <4 x i1> , i32 4) +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.experimental.vp.strided.load.v4i16.p0.i64(ptr align 2 null, i64 6, <4 x i1> splat (i1 true), i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr null, align 2 ; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i16> [[TMP0]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[TMP2]]) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll index 7771e8369b6198..c89a82dd359637 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll @@ -6,7 +6,7 @@ define i32 @test(ptr %0, ptr %1) { ; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOAD_5:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> ) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> splat (i8 1)) ; CHECK-NEXT: [[TMP3:%.*]] = sext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[LOAD_5]] ; CHECK-NEXT: ret i32 [[OP_RDX]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index 4cc9a0124337d9..bc24a44cecbe39 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -149,7 +149,7 @@ entry: define i64 @red_strided_ld_16xi64(ptr %ptr) { ; CHECK-LABEL: @red_strided_ld_16xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 [[PTR:%.*]], i64 16, <16 x i1> , i32 16) +; CHECK-NEXT: [[TMP0:%.*]] = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 [[PTR:%.*]], i64 16, <16 x i1> splat (i1 true), i32 16) ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP0]]) ; CHECK-NEXT: ret i64 [[TMP1]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll index 4788e1ef715593..bf6e2bd91ae46f 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll @@ -17,7 +17,7 @@ define void @test() { ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr null, align 4 -; CHECK-NEXT: [[TMP5:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> zeroinitializer, i32 4, <2 x i1> , <2 x float> poison) +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> zeroinitializer, i32 4, <2 x i1> splat (i1 true), <2 x float> poison) ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> , float [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <2 x float> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i1> [[TMP6]], <2 x i1> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll index 0cf4da623a0fe9..3d00ddf89aaa3b 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll @@ -6,7 +6,7 @@ define i32 @test() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[IF_END_I87:%.*]] ; CHECK: if.end.i87: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> , <4 x i64> ), i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> , <4 x i64> ), i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> zeroinitializer, i64 2) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: switch i32 0, label [[SW_BB509_I:%.*]] [ diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll index 9c1da08c64b7b7..fd3d4ab80b29cc 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll @@ -14,12 +14,12 @@ define void @test(ptr %a, i64 %0) { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> , <2 x double> poison) +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison) ; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = fsub <2 x double> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = fsub <2 x double> [[TMP7]], [[TMP10]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP11]], ptr align 8 [[ARRAYIDX17_I28_1]], i64 -8, <2 x i1> , i32 2) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP11]], ptr align 8 [[ARRAYIDX17_I28_1]], i64 -8, <2 x i1> splat (i1 true), i32 2) ; CHECK-NEXT: br label %[[BB]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/scatter-vectorize-reversed.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/scatter-vectorize-reversed.ll index 98333c7b420cf0..8aa75294c4d519 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/scatter-vectorize-reversed.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/scatter-vectorize-reversed.ll @@ -9,7 +9,7 @@ define <4 x i32> @test(<2 x i64> %v, ptr %p) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> poison, ptr [[P]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x ptr> [[TMP0]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, <2 x ptr> [[TMP1]], <2 x i64> [[TMP4]] -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> [[TMP2]], i32 2, <2 x i1> , <2 x i16> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> [[TMP2]], i32 2, <2 x i1> splat (i1 true), <2 x i16> poison) ; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i16> [[TMP3]] to <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> [[TMP6]], <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll index 9cfc5f86cb014a..063bc0458cadc6 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll @@ -8,8 +8,8 @@ define i32 @pow2_zero_constant_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[B]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[C]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[D]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> , <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], splat (i16 1) +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> splat (i32 65536), <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]]) ; CHECK-NEXT: ret i32 [[TMP7]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll index 54b7b1192ec973..8db3a8b6ff2198 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll @@ -9,11 +9,11 @@ define <4 x i32> @test(i16 %0, i16 %1) { ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[CONV15_I:%.*]] = sext i16 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1) ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> , i32 [[CONV15_I]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP5]], <4 x i32> [[TMP7]]) -; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i32> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i32> [[TMP8]], splat (i32 65535) ; CHECK-NEXT: ret <4 x i32> [[TMP9]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll index 44d320c75fedd4..f1619c9dd034d2 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll @@ -7,8 +7,8 @@ define void @test(ptr %p, ptr noalias %s) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <8 x float> [[TMP1]], [[TMP0]] ; CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void @@ -81,8 +81,8 @@ define void @test1(ptr %p, ptr noalias %s, i32 %stride) { ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[STR]], 4 -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 [[TMP0]], <8 x i1> , i32 8) -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 [[TMP0]], <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP1]] ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void @@ -162,9 +162,9 @@ define void @test2(ptr %p, ptr noalias %s, i32 %stride) { ; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STR]], -4 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 [[TMP1]], <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 [[TMP1]], <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void @@ -242,8 +242,8 @@ define void @test3(ptr %p, ptr noalias %s) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> , i32 8) -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -4, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -4, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll index 93f5b5e46d2c33..655db54af98ac5 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-indices.ll @@ -14,7 +14,7 @@ define void @test() { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> , i64 [[ADD_I_I62_US]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i64> zeroinitializer, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[CLASS_A:%.*]], <2 x ptr> zeroinitializer, <2 x i64> [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[TMP2]], i32 4, <2 x i1> , <2 x i32> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[TMP2]], i32 4, <2 x i1> splat (i1 true), <2 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 ; CHECK-NEXT: [[CMP_I_I_I_I67_US:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll index b47168c76fa2bc..bce0884e929258 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll @@ -15,7 +15,7 @@ define i16 @test() { ; CHECK-NEXT: [[PEDGE_061_I:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[WHILE_BODY_I]] ], [ null, [[ENTRY]] ] ; CHECK-NEXT: [[INCDEC_PTR_I]] = getelementptr [[S]], ptr [[PEDGE_061_I]], i64 -1 ; CHECK-NEXT: [[PPREV_0_I]] = getelementptr [[S]], ptr [[PPREV_062_I]], i64 -1 -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.experimental.vp.strided.load.v2i16.p0.i64(ptr align 2 [[PPREV_0_I]], i64 4, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.experimental.vp.strided.load.v2i16.p0.i64(ptr align 2 [[PPREV_0_I]], i64 4, <2 x i1> splat (i1 true), i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i16> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1 ; CHECK-NEXT: [[CMP_I178:%.*]] = icmp ult i16 [[TMP3]], [[TMP2]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll index 8ab57cc73e646f..d94cd59a489b34 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll @@ -5,7 +5,7 @@ define i32 @sum_of_abs(ptr noalias %a, ptr noalias %b) { ; CHECK-LABEL: define i32 @sum_of_abs ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr align 1 [[A]], i64 64, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr align 1 [[A]], i64 64, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP0]], i1 false) ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-stores-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-stores-vectorized.ll index d4dca87bfd8e40..7a0ab8002b8c04 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-stores-vectorized.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-stores-vectorized.ll @@ -9,7 +9,7 @@ define void @store_reverse(ptr %p3) { ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P3]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[TMP0]], [[TMP1]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> [[TMP2]], ptr align 8 [[ARRAYIDX2]], i64 -8, <4 x i1> , i32 4) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> [[TMP2]], ptr align 8 [[ARRAYIDX2]], i64 -8, <4 x i1> splat (i1 true), i32 4) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll index 3ca5733a6bad02..89539739ebcb9d 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll @@ -8,7 +8,7 @@ define i32 @test(i64 %v1, i64 %v2) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[V2]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 32) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <2 x i64> [[TMP4]] to <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP2]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll index 0c0c723e669961..744e1d86b0a9ce 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll @@ -7,9 +7,9 @@ define i32 @test() { ; CHECK-LABEL: define i32 @test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> , i32 4) +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> splat (i1 true), i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], splat (i16 -1) ; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i32 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP5]], i32 1) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll index 4f855e20fac4db..21312c28891d52 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll @@ -11,18 +11,18 @@ define void @test(ptr %p, i16 %load794) { ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i32> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i32> [[TMP7]], splat (i32 3329) ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[ZEXT795]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i32> [[TMP8]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i32> [[TMP12]] to <2 x i64> -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw <2 x i64> [[TMP9]], -; CHECK-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP10]], +; CHECK-NEXT: [[TMP22:%.*]] = mul nuw nsw <2 x i64> [[TMP9]], splat (i64 5039) +; CHECK-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP22]], splat (i64 24) ; CHECK-NEXT: [[TMP13:%.*]] = trunc <2 x i64> [[TMP11]] to <2 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = mul <2 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP20:%.*]] = mul <2 x i32> [[TMP13]], splat (i32 62207) ; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = trunc <2 x i32> [[TMP21]] to <2 x i16> -; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i16> [[TMP14]], +; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i16> [[TMP14]], splat (i16 -3329) ; CHECK-NEXT: [[TMP16:%.*]] = icmp slt <2 x i16> [[TMP15]], zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[TMP15]], <2 x i16> zeroinitializer) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll index 3a1a8fb4b2e32f..073c8847b15a1c 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll @@ -8,7 +8,7 @@ define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <15 x i8>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], splat (i8 10) ; NON-POW2-NEXT: store <15 x i8> [[TMP1]], ptr [[DST]], align 1 ; NON-POW2-NEXT: ret void ; @@ -17,17 +17,17 @@ define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) { ; POW2-ONLY-NEXT: entry: ; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], splat (i8 10) ; POW2-ONLY-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1 ; POW2-ONLY-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8 ; POW2-ONLY-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4 -; POW2-ONLY-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], +; POW2-ONLY-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], splat (i8 10) ; POW2-ONLY-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1 ; POW2-ONLY-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12 ; POW2-ONLY-NEXT: [[DST_12:%.*]] = getelementptr i8, ptr [[DST]], i8 12 ; POW2-ONLY-NEXT: [[TMP4:%.*]] = load <2 x i8>, ptr [[GEP_SRC_12]], align 4 -; POW2-ONLY-NEXT: [[TMP5:%.*]] = mul nsw <2 x i8> [[TMP4]], +; POW2-ONLY-NEXT: [[TMP5:%.*]] = mul nsw <2 x i8> [[TMP4]], splat (i8 10) ; POW2-ONLY-NEXT: store <2 x i8> [[TMP5]], ptr [[DST_12]], align 1 ; POW2-ONLY-NEXT: [[GEP_SRC_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 14 ; POW2-ONLY-NEXT: [[L_SRC_14:%.*]] = load i8, ptr [[GEP_SRC_14]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll index 308d0e27f1ea89..7ab5e4d6cb787e 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll @@ -7,7 +7,7 @@ define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], splat (i32 10) ; NON-POW2-NEXT: store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -18,7 +18,7 @@ define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) { ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], splat (i32 10) ; POW2-ONLY-NEXT: store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 @@ -56,7 +56,7 @@ define void @v3_load_i32_udiv_by_constant_store(ptr %src, ptr %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = udiv <3 x i32> , [[TMP0]] +; NON-POW2-NEXT: [[TMP1:%.*]] = udiv <3 x i32> splat (i32 10), [[TMP0]] ; NON-POW2-NEXT: store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -170,7 +170,7 @@ define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) { ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4 ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4 ; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]] -; NON-POW2-NEXT: [[TMP3:%.*]] = add <3 x i32> [[TMP2]], +; NON-POW2-NEXT: [[TMP3:%.*]] = add <3 x i32> [[TMP2]], splat (i32 9) ; NON-POW2-NEXT: store <3 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -187,7 +187,7 @@ define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) { ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4 ; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4 ; POW2-ONLY-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]] -; POW2-ONLY-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], +; POW2-ONLY-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], splat (i32 9) ; POW2-ONLY-NEXT: store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store i32 [[ADD_2]], ptr [[DST_2]], align 4 @@ -231,7 +231,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01) ; NON-POW2-NEXT: store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -242,7 +242,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 ; POW2-ONLY-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01) ; POW2-ONLY-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4 @@ -467,7 +467,7 @@ define i32 @reduce_add_after_mul(ptr %src) { ; NON-POW2-LABEL: @reduce_add_after_mul( ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP1]], +; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP1]], splat (i32 10) ; NON-POW2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP2]]) ; NON-POW2-NEXT: ret i32 [[TMP3]] ; @@ -798,7 +798,7 @@ define float @reduce_fadd_after_fmul_of_buildvec(float %a, float %b, float %c) { ; NON-POW2-NEXT: [[TMP1:%.*]] = insertelement <3 x float> poison, float [[A:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[TMP1]], float [[B:%.*]], i32 1 ; NON-POW2-NEXT: [[TMP3:%.*]] = insertelement <3 x float> [[TMP2]], float [[C:%.*]], i32 2 -; NON-POW2-NEXT: [[TMP4:%.*]] = fmul fast <3 x float> [[TMP3]], +; NON-POW2-NEXT: [[TMP4:%.*]] = fmul fast <3 x float> [[TMP3]], splat (float 1.000000e+01) ; NON-POW2-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP4]]) ; NON-POW2-NEXT: ret float [[TMP5]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll index a7bb272b44dc4b..ca93cbd698adac 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll @@ -6,7 +6,7 @@ define void @test(i64 %0, i1 %.cmp.i.2, i1 %1, ptr %a) { ; CHECK-SAME: i64 [[TMP0:%.*]], i1 [[DOTCMP_I_2:%.*]], i1 [[TMP1:%.*]], ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], splat (i64 63) ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> poison, i1 [[DOTCMP_I_2]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> poison, <4 x i32> @@ -14,7 +14,7 @@ define void @test(i64 %0, i1 %.cmp.i.2, i1 %1, ptr %a) { ; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP10]], <4 x i1> [[TMP8]] ; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i1> [[TMP11]] to <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], +; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], splat (i32 1) ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP13]]) ; CHECK-NEXT: store i32 [[TMP14]], ptr [[A]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll index 9cdb7c228e3827..9e6270376ddd43 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll @@ -16,7 +16,7 @@ define void @foo() { ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP2]], <2 x i32> [[TMP1]], i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> , i32 [[ADD277]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = ashr <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr <4 x i32> [[TMP5]], splat (i32 6) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[ARRAYIDX372]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll index d8845f20ecd72e..5ebbb76f3d6c33 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -16,8 +16,8 @@ define void @test() #0 { ; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP5]], 32 -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> , [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = ashr exact <4 x i64> [[TMP6]], +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> splat (i64 1), [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = ashr exact <4 x i64> [[TMP6]], splat (i64 32) ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP7]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP8]], [[TMP4]] ; CHECK-NEXT: [[OP_RDX1]] = add i64 [[OP_RDX]], 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll index 8e878f3f8b80fa..8839fc22817888 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll @@ -171,17 +171,17 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-LABEL: @ashr_shl_v8i32_const( ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2) ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SSE-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[R71]] ; ; SLM-LABEL: @ashr_shl_v8i32_const( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2) ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SLM-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[R71]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll index 0b17e19e4fadd8..dfa918a6ea4532 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -171,17 +171,17 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-LABEL: @ashr_shl_v8i32_const( ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2) ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SSE-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[R71]] ; ; SLM-LABEL: @ashr_shl_v8i32_const( ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2) ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> -; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SLM-NEXT: [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[R71]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll index 2c16612722c6a7..6187dd9ea910d8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll @@ -15,7 +15,7 @@ define void @and4(ptr noalias nocapture noundef writeonly %dst, ptr noalias noca ; CHECK-LABEL: @and4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i8> [[TMP0]], splat (i8 -64) ; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[DST:%.*]], align 1 ; CHECK-NEXT: ret void ; @@ -45,7 +45,7 @@ define void @and8(ptr noalias nocapture noundef writeonly %dst, ptr noalias noca ; CHECK-LABEL: @and8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i8> [[TMP0]], splat (i8 -64) ; CHECK-NEXT: store <8 x i8> [[TMP1]], ptr [[DST:%.*]], align 1 ; CHECK-NEXT: ret void ; @@ -95,7 +95,7 @@ define void @and16(ptr noalias nocapture noundef writeonly %dst, ptr noalias noc ; CHECK-LABEL: @and16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[TMP0]], splat (i8 -64) ; CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[DST:%.*]], align 1 ; CHECK-NEXT: ret void ; @@ -185,19 +185,19 @@ define void @and32(ptr noalias nocapture noundef writeonly %dst, ptr noalias noc ; SSE-LABEL: @and32( ; SSE-NEXT: entry: ; SSE-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 -; SSE-NEXT: [[TMP1:%.*]] = and <16 x i8> [[TMP0]], +; SSE-NEXT: [[TMP1:%.*]] = and <16 x i8> [[TMP0]], splat (i8 -64) ; SSE-NEXT: store <16 x i8> [[TMP1]], ptr [[DST:%.*]], align 1 ; SSE-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 16 ; SSE-NEXT: [[ARRAYIDX3_16:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 16 ; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX_16]], align 1 -; SSE-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], +; SSE-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], splat (i8 -64) ; SSE-NEXT: store <16 x i8> [[TMP3]], ptr [[ARRAYIDX3_16]], align 1 ; SSE-NEXT: ret void ; ; AVX-LABEL: @and32( ; AVX-NEXT: entry: ; AVX-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1 -; AVX-NEXT: [[TMP1:%.*]] = and <32 x i8> [[TMP0]], +; AVX-NEXT: [[TMP1:%.*]] = and <32 x i8> [[TMP0]], splat (i8 -64) ; AVX-NEXT: store <32 x i8> [[TMP1]], ptr [[DST:%.*]], align 1 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll index 33fd3e6dc0e09f..25652e026a8374 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll @@ -25,46 +25,46 @@ define void @sdiv_v16i32_uniformconst() { ; SSE-LABEL: @sdiv_v16i32_uniformconst( ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SSE-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], +; SSE-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], +; SSE-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @sdiv_v16i32_uniformconst( ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SLM-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], +; SLM-NEXT: [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], +; SLM-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SLM-NEXT: ret void ; ; AVX-LABEL: @sdiv_v16i32_uniformconst( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 -; AVX-NEXT: [[TMP2:%.*]] = sdiv <8 x i32> [[TMP1]], +; AVX-NEXT: [[TMP2:%.*]] = sdiv <8 x i32> [[TMP1]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4 ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; AVX-NEXT: [[TMP4:%.*]] = sdiv <8 x i32> [[TMP3]], +; AVX-NEXT: [[TMP4:%.*]] = sdiv <8 x i32> [[TMP3]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; AVX-NEXT: ret void ; ; AVX512-LABEL: @sdiv_v16i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = sdiv <16 x i32> [[TMP1]], +; AVX512-NEXT: [[TMP2:%.*]] = sdiv <16 x i32> [[TMP1]], splat (i32 5) ; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4 ; AVX512-NEXT: ret void ; @@ -122,46 +122,46 @@ define void @sdiv_v16i32_uniformconst() { define void @srem_v16i32_uniformconst() { ; SSE-LABEL: @srem_v16i32_uniformconst( ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SSE-NEXT: [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], +; SSE-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], +; SSE-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @srem_v16i32_uniformconst( ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SLM-NEXT: [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], +; SLM-NEXT: [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], +; SLM-NEXT: [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SLM-NEXT: ret void ; ; AVX-LABEL: @srem_v16i32_uniformconst( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 -; AVX-NEXT: [[TMP2:%.*]] = srem <8 x i32> [[TMP1]], +; AVX-NEXT: [[TMP2:%.*]] = srem <8 x i32> [[TMP1]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4 ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; AVX-NEXT: [[TMP4:%.*]] = srem <8 x i32> [[TMP3]], +; AVX-NEXT: [[TMP4:%.*]] = srem <8 x i32> [[TMP3]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; AVX-NEXT: ret void ; ; AVX512-LABEL: @srem_v16i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = srem <16 x i32> [[TMP1]], +; AVX512-NEXT: [[TMP2:%.*]] = srem <16 x i32> [[TMP1]], splat (i32 5) ; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4 ; AVX512-NEXT: ret void ; @@ -219,46 +219,46 @@ define void @srem_v16i32_uniformconst() { define void @udiv_v16i32_uniformconst() { ; SSE-LABEL: @udiv_v16i32_uniformconst( ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SSE-NEXT: [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], +; SSE-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], +; SSE-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @udiv_v16i32_uniformconst( ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SLM-NEXT: [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], +; SLM-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], +; SLM-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SLM-NEXT: ret void ; ; AVX-LABEL: @udiv_v16i32_uniformconst( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 -; AVX-NEXT: [[TMP2:%.*]] = udiv <8 x i32> [[TMP1]], +; AVX-NEXT: [[TMP2:%.*]] = udiv <8 x i32> [[TMP1]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4 ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; AVX-NEXT: [[TMP4:%.*]] = udiv <8 x i32> [[TMP3]], +; AVX-NEXT: [[TMP4:%.*]] = udiv <8 x i32> [[TMP3]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; AVX-NEXT: ret void ; ; AVX512-LABEL: @udiv_v16i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = udiv <16 x i32> [[TMP1]], +; AVX512-NEXT: [[TMP2:%.*]] = udiv <16 x i32> [[TMP1]], splat (i32 5) ; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4 ; AVX512-NEXT: ret void ; @@ -316,46 +316,46 @@ define void @udiv_v16i32_uniformconst() { define void @urem_v16i32_uniformconst() { ; SSE-LABEL: @urem_v16i32_uniformconst( ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SSE-NEXT: [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SSE-NEXT: [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], +; SSE-NEXT: [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SSE-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], +; SSE-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SSE-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], +; SSE-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 5) ; SSE-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SSE-NEXT: ret void ; ; SLM-LABEL: @urem_v16i32_uniformconst( ; SLM-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 -; SLM-NEXT: [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], +; SLM-NEXT: [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP2]], ptr @c32, align 4 ; SLM-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 -; SLM-NEXT: [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], +; SLM-NEXT: [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 ; SLM-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; SLM-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], +; SLM-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; SLM-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 -; SLM-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], +; SLM-NEXT: [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 5) ; SLM-NEXT: store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 ; SLM-NEXT: ret void ; ; AVX-LABEL: @urem_v16i32_uniformconst( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 -; AVX-NEXT: [[TMP2:%.*]] = urem <8 x i32> [[TMP1]], +; AVX-NEXT: [[TMP2:%.*]] = urem <8 x i32> [[TMP1]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @c32, align 4 ; AVX-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 -; AVX-NEXT: [[TMP4:%.*]] = urem <8 x i32> [[TMP3]], +; AVX-NEXT: [[TMP4:%.*]] = urem <8 x i32> [[TMP3]], splat (i32 5) ; AVX-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 ; AVX-NEXT: ret void ; ; AVX512-LABEL: @urem_v16i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = urem <16 x i32> [[TMP1]], +; AVX512-NEXT: [[TMP2:%.*]] = urem <16 x i32> [[TMP1]], splat (i32 5) ; AVX512-NEXT: store <16 x i32> [[TMP2]], ptr @c32, align 4 ; AVX512-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll index 16977c025e3eaa..45294e581e6aea 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll @@ -854,13 +854,13 @@ define void @fshl_v2i32_uniformconst() { ; ; AVX256-LABEL: @fshl_v2i32_uniformconst( ; AVX256-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4 -; AVX256-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> ) +; AVX256-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; AVX256-NEXT: store <2 x i32> [[TMP2]], ptr @d32, align 4 ; AVX256-NEXT: ret void ; ; AVX512-LABEL: @fshl_v2i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> ) +; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; AVX512-NEXT: store <2 x i32> [[TMP2]], ptr @d32, align 4 ; AVX512-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll index 090a9daa6a1136..d2002b4eedaf40 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll @@ -854,13 +854,13 @@ define void @fshr_v2i32_uniformconst() { ; ; AVX256-LABEL: @fshr_v2i32_uniformconst( ; AVX256-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4 -; AVX256-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> ) +; AVX256-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; AVX256-NEXT: store <2 x i32> [[TMP2]], ptr @d32, align 4 ; AVX256-NEXT: ret void ; ; AVX512-LABEL: @fshr_v2i32_uniformconst( ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4 -; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> ) +; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) ; AVX512-NEXT: store <2 x i32> [[TMP2]], ptr @d32, align 4 ; AVX512-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll index d388fd17925a16..f46a5d84a86cc9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll @@ -13,7 +13,7 @@ define i32 @foo(ptr nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll index 4af23a6c965100..f1b094e9bbed4a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll @@ -80,7 +80,7 @@ define i32 @ray_sphere(ptr nocapture noundef readonly %sph, ptr nocapture nounde ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP93]], i1 [[CMP94]], i1 false ; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[TMP36:%.*]] = fcmp ule <2 x double> [[TMP33]], +; CHECK-NEXT: [[TMP36:%.*]] = fcmp ule <2 x double> [[TMP33]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0 ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1 ; CHECK-NEXT: [[OR_COND106:%.*]] = select i1 [[TMP38]], i1 true, i1 [[TMP37]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll b/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll index e8548e0467381e..860d0ed29332c2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll @@ -17,7 +17,7 @@ define void @test(ptr %0, i32 %add651) { ; CHECK-NEXT: [[ARRAYIDX660:%.*]] = getelementptr i8, ptr [[TMP4]], i64 7800 ; CHECK-NEXT: [[ARRAYIDX689:%.*]] = getelementptr i8, ptr [[TMP4]], i64 7816 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP3]], splat (i32 1) ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> , <2 x i32> @@ -25,7 +25,7 @@ define void @test(ptr %0, i32 %add651) { ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[ADD651]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP14:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP13]], <2 x i32> [[TMP10]], i64 2) -; CHECK-NEXT: [[TMP15:%.*]] = lshr <4 x i32> [[TMP14]], +; CHECK-NEXT: [[TMP15:%.*]] = lshr <4 x i32> [[TMP14]], splat (i32 1) ; CHECK-NEXT: [[SHR685:%.*]] = lshr i32 [[TMP2]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i32> [[TMP15]] to <4 x i16> ; CHECK-NEXT: [[CONV686:%.*]] = trunc i32 [[SHR685]] to i16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll index 0b9a053d49204c..926cc7b906efe1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll @@ -14,7 +14,7 @@ define i32 @foo(ptr noalias nocapture %A, ptr noalias nocapture %B, double %G) { ; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <2 x double> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[G:%.*]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[SHUFFLE]], <2 x double> +; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[SHUFFLE]], <2 x double> splat (double 1.000000e+00) ; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[A:%.*]], align 8 ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll index 133e64e5bd00d8..54a81a3ce723cf 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll @@ -12,7 +12,7 @@ define void @foo(ptr %v0, ptr readonly %v1) { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[T14]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[T142]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[TMP3]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[T222]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[TMP5]], ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[T212]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll index ae6e6723706cd9..0b6c8f3d2562b3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll @@ -82,7 +82,7 @@ define float @merge_anyof_v4f32_wrong_first(<4 x float> %x) { ; CHECK-LABEL: @merge_anyof_v4f32_wrong_first( ; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 ; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[X3]], 4.200000e+01 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[X]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP2]], [[CMP3WRONG]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[OP_RDX]], float -1.000000e+00, float 1.000000e+00 @@ -109,7 +109,7 @@ define float @merge_anyof_v4f32_wrong_last(<4 x float> %x) { ; CHECK-LABEL: @merge_anyof_v4f32_wrong_last( ; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 ; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[X3]], 4.200000e+01 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[X]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP2]], [[CMP3WRONG]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[OP_RDX]], float -1.000000e+00, float 1.000000e+00 @@ -136,7 +136,7 @@ define i32 @merge_anyof_v4i32_wrong_middle(<4 x i32> %x) { ; CHECK-LABEL: @merge_anyof_v4i32_wrong_middle( ; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3 ; CHECK-NEXT: [[CMP3WRONG:%.*]] = icmp slt i32 [[X3]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 1) ; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP2]], [[CMP3WRONG]] ; CHECK-NEXT: [[R:%.*]] = select i1 [[OP_RDX]], i32 -1, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll index 4eb0be61b51c41..925b348cdeec1f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll @@ -24,11 +24,11 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %ve ; CHECK: for.body233: ; CHECK-NEXT: br i1 undef, label [[FOR_BODY233]], label [[FOR_END271]] ; CHECK: for.end271: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ , [[FOR_END227]] ], [ undef, [[FOR_BODY233]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ splat (float 0x47EFFFFFE0000000), [[FOR_END227]] ], [ undef, [[FOR_BODY233]] ] ; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> undef, [[TMP0]] ; CHECK-NEXT: br i1 undef, label [[IF_THEN291:%.*]], label [[RETURN]] ; CHECK: if.then291: -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], splat (float 5.000000e-01) ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP0]], [[TMP2]] ; CHECK-NEXT: br i1 undef, label [[IF_END332:%.*]], label [[IF_ELSE319:%.*]] ; CHECK: if.else319: @@ -40,7 +40,7 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %ve ; CHECK: if.then329: ; CHECK-NEXT: br label [[IF_END332]] ; CHECK: if.end332: -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ , [[IF_THEN291]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ splat (float 0x3F847AE140000000), [[IF_THEN291]] ] ; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x float> [[TMP3]], [[TMP4]] ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[VERTICES:%.*]], align 4 ; CHECK-NEXT: br label [[RETURN]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll index 849c1d7c192dce..988e8985b9f151 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -24,20 +24,20 @@ define void @testfunc(ptr nocapture %dest, ptr nocapture readonly %src) { ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[TMP4]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP6:%.*]] = fcmp olt <2 x float> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP5]], <2 x float> -; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x float> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x float> [[TMP5]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP8]], <2 x float> [[TMP5]], <2 x float> splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP15:%.*]] = fcmp olt <2 x float> [[TMP7]], splat (float -1.000000e+00) ; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP8]], <2 x float> , <2 x float> [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP15]], <2 x float> splat (float -0.000000e+00), <2 x float> [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP10]], i32 1 ; CHECK-NEXT: [[ADD13]] = fadd float [[TMP11]], [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[ADD13]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = fcmp olt <2 x float> [[TMP14]], -; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x float> [[TMP14]], <2 x float> -; CHECK-NEXT: [[TMP17:%.*]] = fcmp olt <2 x float> [[TMP16]], -; CHECK-NEXT: [[TMP18]] = select <2 x i1> [[TMP17]], <2 x float> , <2 x float> [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = fcmp olt <2 x float> [[TMP14]], splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP17]], <2 x float> [[TMP14]], <2 x float> splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP19:%.*]] = fcmp olt <2 x float> [[TMP20]], splat (float -1.000000e+00) +; CHECK-NEXT: [[TMP18]] = select <2 x i1> [[TMP19]], <2 x float> splat (float -1.000000e+00), <2 x float> [[TMP20]] ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll index 2f69a01bb5e370..95ae544e2c62f9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll @@ -15,7 +15,7 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.638400e+04) ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 0 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll index 9b35fcaebadf95..1e31772b8e49ed 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -15,7 +15,7 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.638400e+04) ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[ADD]], i32 0 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll index 1c0fa1d85f9e4a..a9f92f324d6f54 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll @@ -67,7 +67,7 @@ define void @SIM4() { ; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ [[TMP3]], [[IF_THEN_I]] ], [ undef, [[WHILE_END275_I]] ] ; CHECK-NEXT: br i1 false, label [[IF_THEN157:%.*]], label [[LAND_LHS_TRUE167]] ; CHECK: if.then157: -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr undef, align 4 ; CHECK-NEXT: br label [[LAND_LHS_TRUE167]] ; CHECK: land.lhs.true167: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll index 1f33676c19c148..42ad20ff578c10 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -27,7 +27,7 @@ define void @main() { ; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, 0x3EB0C6F7A0B5ED8D ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double [[ADD_I276_US]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> , [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.400000e+02) ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], ; CHECK-NEXT: store <2 x double> [[TMP3]], ptr undef, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> , [[TMP1]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll index 9c8569fcbdf759..739e3964c2685f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll @@ -179,11 +179,11 @@ define i32 @foo4(ptr nocapture %A, i32 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x double> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x double> [[TMP0]], splat (double 7.900000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], splat (double 6.000000e+00) ; CHECK-NEXT: store <4 x double> [[TMP5]], ptr [[A]], align 8 ; CHECK-NEXT: ret i32 undef ; @@ -335,8 +335,8 @@ define void @cse_for_hoisted_instructions_in_preheader(ptr %dst, i32 %a, i1 %c) ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> , [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> splat (i32 22), [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], splat (i32 3) ; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 ; CHECK-NEXT: [[OR_2:%.*]] = or i32 [[A]], 3 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 10 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/debug-counter.ll b/llvm/test/Transforms/SLPVectorizer/X86/debug-counter.ll index d203888e0c6ee8..b42f36f105636e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/debug-counter.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/debug-counter.ll @@ -25,12 +25,12 @@ define void @blam(ptr %arg, double %load2, i1 %fcmp3) { ; COUNT0-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> , <2 x i32> ; COUNT0-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]] ; COUNT0-NEXT: [[TMP14:%.*]] = fcmp olt <2 x double> [[TMP13]], zeroinitializer -; COUNT0-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> +; COUNT0-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; COUNT0-NEXT: [[TMP16:%.*]] = fcmp ogt <2 x double> [[TMP15]], zeroinitializer ; COUNT0-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer ; COUNT0-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP16]], <2 x double> zeroinitializer, <2 x double> [[TMP17]] ; COUNT0-NEXT: [[TMP19:%.*]] = fcmp olt <2 x double> [[TMP18]], zeroinitializer -; COUNT0-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> , <2 x double> zeroinitializer +; COUNT0-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer ; COUNT0-NEXT: store <2 x double> [[TMP20]], ptr [[GETELEMENTPTR13]], align 8 ; COUNT0-NEXT: ret void ; @@ -135,12 +135,12 @@ define void @blam(ptr %arg, double %load2, i1 %fcmp3) { ; COUNT-1-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> , <2 x i32> ; COUNT-1-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]] ; COUNT-1-NEXT: [[TMP14:%.*]] = fcmp olt <2 x double> [[TMP13]], zeroinitializer -; COUNT-1-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> +; COUNT-1-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; COUNT-1-NEXT: [[TMP16:%.*]] = fcmp ogt <2 x double> [[TMP15]], zeroinitializer ; COUNT-1-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer ; COUNT-1-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP16]], <2 x double> zeroinitializer, <2 x double> [[TMP17]] ; COUNT-1-NEXT: [[TMP19:%.*]] = fcmp olt <2 x double> [[TMP18]], zeroinitializer -; COUNT-1-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> , <2 x double> zeroinitializer +; COUNT-1-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer ; COUNT-1-NEXT: store <2 x double> [[TMP20]], ptr [[GETELEMENTPTR13]], align 8 ; COUNT-1-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll b/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll index feeb34e2f1145b..22c1d7ce388ab5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll @@ -17,13 +17,13 @@ define void @PR28457(ptr noalias nocapture align 32 %q, ptr noalias nocapture re ; SSE-NEXT: [[Q2:%.*]] = getelementptr inbounds double, ptr [[Q:%.*]], i64 2 ; SSE-NEXT: [[Q4:%.*]] = getelementptr inbounds double, ptr [[Q]], i64 4 ; SSE-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[P]], align 8 -; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], +; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], splat (double 1.000000e+00) ; SSE-NEXT: store <2 x double> [[TMP3]], ptr [[Q]], align 8 ; SSE-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr [[P2]], align 8 -; SSE-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], +; SSE-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], splat (double 1.000000e+00) ; SSE-NEXT: store <2 x double> [[TMP7]], ptr [[Q2]], align 8 ; SSE-NEXT: [[TMP10:%.*]] = load <2 x double>, ptr [[P4]], align 8 -; SSE-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], +; SSE-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], splat (double 1.000000e+00) ; SSE-NEXT: store <2 x double> [[TMP11]], ptr [[Q4]], align 8 ; SSE-NEXT: ret void ; @@ -31,10 +31,10 @@ define void @PR28457(ptr noalias nocapture align 32 %q, ptr noalias nocapture re ; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 4 ; AVX-NEXT: [[Q4:%.*]] = getelementptr inbounds double, ptr [[Q:%.*]], i64 4 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr [[P]], align 8 -; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], +; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], splat (double 1.000000e+00) ; AVX-NEXT: store <4 x double> [[TMP3]], ptr [[Q]], align 8 ; AVX-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr [[P4]], align 8 -; AVX-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], +; AVX-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], splat (double 1.000000e+00) ; AVX-NEXT: store <2 x double> [[TMP7]], ptr [[Q4]], align 8 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll index 3d3d00f4a0b3fd..fb25ff975adc2e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll @@ -12,8 +12,8 @@ define void @test() { ; CHECK: loop: ; CHECK-NEXT: [[PHI1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX25:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i64> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[LOOP]] ] -; CHECK-NEXT: [[TMP7:%.*]] = mul <8 x i64> [[TMP6]], -; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = mul <8 x i64> [[TMP6]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP1]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP7]]) ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) ; CHECK-NEXT: [[OP_RDX16:%.*]] = add i64 [[TMP9]], [[TMP8]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll index 597ccd712e0a69..92157b24c6bb05 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll @@ -31,9 +31,9 @@ define double @ext_user(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 ; CHECK: for.body: ; CHECK-NEXT: [[I_020:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], -; CHECK-NEXT: [[TMP5]] = fadd <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], splat (double 1.000000e+01) +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], splat (double 4.000000e+00) +; CHECK-NEXT: [[TMP5]] = fadd <2 x double> [[TMP4]], splat (double 4.000000e+00) ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_020]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll index 1374e9873e1c53..02c3173adc654f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll @@ -12,7 +12,7 @@ define i32 @foo(ptr nocapture %A, i32 %n, i32 %m) { ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 ; CHECK-NEXT: [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP6]], [[M:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll b/llvm/test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll index 12deea6a262ce4..f7bba85a8c4857 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fabs-cost-softfp.ll @@ -14,7 +14,7 @@ define void @vectorize_fp128(fp128 %c, fp128 %d) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x fp128> poison, fp128 [[C:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x fp128> [[TMP0]], fp128 [[D:%.*]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x fp128> @llvm.fabs.v2f128(<2 x fp128> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x fp128> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <2 x fp128> [[TMP2]], splat (fp128 0xL00000000000000007FFF000000000000) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: [[OR_COND39:%.*]] = or i1 [[TMP4]], [[TMP5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll index 234b658032383e..a7a92bad5e5c1f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll @@ -44,7 +44,7 @@ define void @foo(ptr %i7, i32 %0, i1 %tobool62.not) { ; CHECK-NEXT: br label [[IF_END75]] ; CHECK: if.end75: ; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i32> [ [[TMP20]], [[IF_THEN74]] ], [ [[TMP21]], [[IF_END72]] ] -; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i32> [[TMP22]], +; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i32> [[TMP22]], splat (i32 1) ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = mul <4 x i32> [[TMP23]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = sitofp <4 x i32> [[TMP25]] to <4 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll index 7e75970de34929..3ac0d01cf9a2c1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll @@ -12,7 +12,7 @@ define i1 @test(i32 %g, i16 %d) { ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP9]] to <2 x i8> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i8> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i8> [[TMP6]], splat (i8 -3) ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i8> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[TMP8]] to <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll index 9e43cefef2801d..12263b065d89c9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll @@ -31,7 +31,7 @@ define void @test() { ; CHECK-SLP-THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x ptr> poison, ptr [[COND_IN_V]], i32 0 ; CHECK-SLP-THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-SLP-THRESHOLD-NEXT: [[TMP2:%.*]] = getelementptr i64, <4 x ptr> [[TMP1]], <4 x i64> -; CHECK-SLP-THRESHOLD-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> , <4 x i64> poison) +; CHECK-SLP-THRESHOLD-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> splat (i1 true), <4 x i64> poison) ; CHECK-SLP-THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[TMP3]], zeroinitializer ; CHECK-SLP-THRESHOLD-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll index 17f5abada632d8..1514fcec02bd9c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll @@ -14,7 +14,7 @@ define void @foo1 (ptr noalias %x, ptr noalias %y) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds { ptr, ptr }, ptr [[Y:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds { ptr, ptr }, ptr [[X:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x ptr>, ptr [[TMP1]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP4]], <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP4]], <2 x i64> splat (i64 16) ; CHECK-NEXT: store <2 x ptr> [[TMP5]], ptr [[TMP2]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll index e0d7c12f70c2ef..63b41627106e5c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll @@ -18,7 +18,7 @@ define dso_local i32 @g() local_unnamed_addr { ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 1) ; CHECK-NEXT: switch i32 [[TMP4]], label [[WHILE_BODY_BACKEDGE]] [ ; CHECK-NEXT: i32 2, label [[SW_BB:%.*]] ; CHECK-NEXT: i32 4, label [[SW_BB6:%.*]] @@ -29,13 +29,13 @@ define dso_local i32 @g() local_unnamed_addr { ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 2 ; CHECK-NEXT: store i32 [[TMP8]], ptr [[INCDEC_PTR1]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 2) ; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]] ; CHECK: sw.bb6: ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 2 ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[INCDEC_PTR]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 2) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0 ; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP13]], align 4 ; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index c9ff2d6426d2b6..2bcabfad3d09ba 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -34,7 +34,7 @@ define float @baz() { ; THRESHOLD-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) ; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0 ; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[CONV]], i32 1 -; THRESHOLD-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP6]], +; THRESHOLD-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP6]], splat (float 2.000000e+00) ; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index cf9f7f7376ba44..990803870ec23f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -29,7 +29,7 @@ define i32 @add_red(ptr %A, i32 %n) { ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[MUL]] ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], splat (float 7.000000e+00) ; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]]) ; CHECK-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP3]] ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll index 3a35a63c159815..f0a1836e6c273f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll @@ -320,52 +320,52 @@ define i32 @foo1() local_unnamed_addr #0 { ; SSE-LABEL: @foo1( ; SSE-NEXT: entry: ; SSE-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @ib, align 16 -; SSE-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], +; SSE-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP1]], ptr @ia, align 16 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 4), align 16 -; SSE-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], +; SSE-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP3]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 4), align 16 ; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 8), align 16 -; SSE-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], +; SSE-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP5]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 8), align 16 ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 12), align 16 -; SSE-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], +; SSE-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP7]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 12), align 16 ; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 16), align 16 -; SSE-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], +; SSE-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 16), align 16 ; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 20), align 16 -; SSE-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], +; SSE-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP11]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 20), align 16 ; SSE-NEXT: [[TMP12:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 24), align 16 -; SSE-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], +; SSE-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP13]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 24), align 16 ; SSE-NEXT: [[TMP14:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 28), align 16 -; SSE-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], +; SSE-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP15]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 28), align 16 ; SSE-NEXT: [[TMP16:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 32), align 16 -; SSE-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], +; SSE-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP17]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 32), align 16 ; SSE-NEXT: [[TMP18:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 36), align 16 -; SSE-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], +; SSE-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP19]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 36), align 16 ; SSE-NEXT: [[TMP20:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 40), align 16 -; SSE-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], +; SSE-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP21]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 40), align 16 ; SSE-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 44), align 16 -; SSE-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], +; SSE-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP23]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 44), align 16 ; SSE-NEXT: [[TMP24:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 48), align 16 -; SSE-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], +; SSE-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP25]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 48), align 16 ; SSE-NEXT: [[TMP26:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 52), align 16 -; SSE-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], +; SSE-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP27]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 52), align 16 ; SSE-NEXT: [[TMP28:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 56), align 16 -; SSE-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], +; SSE-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP29]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 56), align 16 ; SSE-NEXT: [[TMP30:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 60), align 16 -; SSE-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], +; SSE-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], splat (i32 -1) ; SSE-NEXT: store <4 x i32> [[TMP31]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 60), align 16 ; SSE-NEXT: br label [[FOR_BODY5:%.*]] ; SSE: for.cond3: @@ -390,16 +390,16 @@ define i32 @foo1() local_unnamed_addr #0 { ; AVX512-LABEL: @foo1( ; AVX512-NEXT: entry: ; AVX512-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr @ib, align 16 -; AVX512-NEXT: [[TMP1:%.*]] = xor <16 x i32> [[TMP0]], +; AVX512-NEXT: [[TMP1:%.*]] = xor <16 x i32> [[TMP0]], splat (i32 -1) ; AVX512-NEXT: store <16 x i32> [[TMP1]], ptr @ia, align 16 ; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 16), align 16 -; AVX512-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[TMP2]], +; AVX512-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) ; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 16), align 16 ; AVX512-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 32), align 16 -; AVX512-NEXT: [[TMP5:%.*]] = xor <16 x i32> [[TMP4]], +; AVX512-NEXT: [[TMP5:%.*]] = xor <16 x i32> [[TMP4]], splat (i32 -1) ; AVX512-NEXT: store <16 x i32> [[TMP5]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 32), align 16 ; AVX512-NEXT: [[TMP6:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 48), align 16 -; AVX512-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[TMP6]], +; AVX512-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[TMP6]], splat (i32 -1) ; AVX512-NEXT: store <16 x i32> [[TMP7]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 48), align 16 ; AVX512-NEXT: br label [[FOR_BODY5:%.*]] ; AVX512: for.cond3: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll index 252da8d7fa7a72..b49d99f353b822 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll @@ -10,18 +10,18 @@ define i32 @test(ptr nocapture %A, ptr nocapture %B) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 3) ; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i8> [[TMP2]] to <2 x double> ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], +; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], splat (double 1.000000e+00) ; CHECK-NEXT: store <2 x double> [[TMP13]], ptr [[A:%.*]], align 8 ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll index 81845fed1134c3..cfbfd0ebc37bca 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -22,9 +22,9 @@ define i32 @bar() local_unnamed_addr { ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <16 x i32> [[TMP5]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i32> [[TMP5]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> [[TMP10]], <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = lshr <16 x i32> [[TMP11]], -; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i32> [[TMP12]], -; CHECK-NEXT: [[TMP14:%.*]] = mul nuw <16 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP12:%.*]] = lshr <16 x i32> [[TMP11]], splat (i32 15) +; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i32> [[TMP12]], splat (i32 65537) +; CHECK-NEXT: [[TMP14:%.*]] = mul nuw <16 x i32> [[TMP13]], splat (i32 65535) ; CHECK-NEXT: [[TMP15:%.*]] = add <16 x i32> [[TMP14]], [[TMP11]] ; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i32> [[TMP15]], [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP16]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll index e5d7ad138b4def..166c819098c8c1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll @@ -24,7 +24,7 @@ define void @test(ptr %dst, float %a, float %b, float %c, float %d) { ; CHECK-NEXT: Entry: ; CHECK-NEXT: br i1 poison, label [[LOOP0:%.*]], label [[EXIT:%.*]] ; CHECK: loop0: -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY:%.*]] ], [ [[TMP7:%.*]], [[USERBLOCK1:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ splat (float 5.000000e-01), [[ENTRY:%.*]] ], [ [[TMP7:%.*]], [[USERBLOCK1:%.*]] ] ; CHECK-NEXT: br i1 poison, label [[USERBLOCK0:%.*]], label [[BLKX:%.*]] ; CHECK: UserBlock0: ; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x float> [ zeroinitializer, [[LOOP0]] ], [ [[TMP5:%.*]], [[BLKX]] ] @@ -48,7 +48,7 @@ define void @test(ptr %dst, float %a, float %b, float %c, float %d) { ; CHECK-NEXT: [[TMP9]] = fadd fast <4 x float> [[TMP8]], poison ; CHECK-NEXT: br i1 poison, label [[USERBLOCK1]], label [[LOOP_INNER]] ; CHECK: Exit: -; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP7]], [[USERBLOCK1]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x float> [ splat (float 5.000000e-01), [[ENTRY]] ], [ [[TMP7]], [[USERBLOCK1]] ] ; CHECK-NEXT: [[IDX0:%.*]] = add i64 0, poison ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IDX0]] ; CHECK-NEXT: store <4 x float> [[TMP10]], ptr [[GEP0]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll index e1fd8a7ec88afc..58ea4f8da01a42 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll @@ -15,20 +15,20 @@ define i1 @test(ptr noalias %0, i64 %1, ptr noalias %p, ptr %p1) { ; CHECK-NEXT: [[BF_LOAD_I1345:%.*]] = load i24, ptr [[TMP5]], align 16 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i24> poison, i24 [[BF_LOAD_I1336]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i24> [[TMP6]], i24 [[BF_LOAD_I1345]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = and <2 x i24> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <2 x i24> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i24> , <2 x i24> [[TMP8]] +; CHECK-NEXT: [[TMP8:%.*]] = and <2 x i24> [[TMP7]], splat (i24 255) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <2 x i24> [[TMP8]], splat (i24 24) +; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i24> splat (i24 23), <2 x i24> [[TMP8]] ; CHECK-NEXT: [[TMP23:%.*]] = trunc <2 x i24> [[TMP10]] to <2 x i8> ; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP26]], -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <2 x i32> [[TMP12]], -; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP13]], <2 x i8> , <2 x i8> [[TMP23]] +; CHECK-NEXT: [[TMP13:%.*]] = and <2 x i32> [[TMP26]], splat (i32 254) +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP13]], splat (i32 4) +; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP15]], <2 x i8> splat (i8 2), <2 x i8> [[TMP23]] ; CHECK-NEXT: [[TMP14:%.*]] = zext <2 x i8> [[TMP25]] to <2 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP14]], -; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP15]], <2 x i8> , <2 x i8> [[TMP25]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <2 x i32> [[TMP14]], splat (i32 32) +; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP17]], <2 x i8> splat (i8 31), <2 x i8> [[TMP25]] ; CHECK-NEXT: [[TMP16:%.*]] = zext <2 x i8> [[TMP18]] to <2 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <2 x i32> [[TMP16]], -; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP17]], <2 x i8> , <2 x i8> [[TMP18]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq <2 x i32> [[TMP16]], splat (i32 54) +; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP27]], <2 x i8> splat (i8 53), <2 x i8> [[TMP18]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i8> [[TMP21]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = zext i8 [[TMP22]] to i32 ; CHECK-NEXT: store i32 [[TMP19]], ptr [[P1]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll index a316415dcc6b52..b4cbdfa10460f6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll @@ -19,7 +19,7 @@ define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; SSE-NEXT: entry: ; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 ; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 -; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1) ; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 ; SSE-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64 ; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] @@ -35,7 +35,7 @@ define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; AVX-NEXT: entry: ; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 -; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1) ; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 ; AVX-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64 ; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] @@ -78,7 +78,7 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; SSE-NEXT: entry: ; SSE-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 ; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 -; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; SSE-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1) ; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 ; SSE-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64 ; SSE-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] @@ -94,7 +94,7 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) { ; AVX-NEXT: entry: ; AVX-NEXT: [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0 ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1 -; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], +; AVX-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1) ; AVX-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0 ; AVX-NEXT: [[TMP4:%.*]] = sext i8 [[TMP3]] to i64 ; AVX-NEXT: [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll b/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll index 4b5b5da21cd21d..35625d70a26671 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll @@ -9,8 +9,8 @@ define void @PR62969(ptr dereferenceable(16) %out, ptr dereferenceable(16) %in) ; CHECK-NEXT: [[IN0:%.*]] = getelementptr inbounds [2 x i64], ptr [[IN:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[OUT0:%.*]] = getelementptr inbounds [2 x i64], ptr [[OUT:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[IN0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP1]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 32) ; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i64> [[TMP2]], [[TMP3]] ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr [[OUT0]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll index 4898111960c0fd..355f5306ee4db1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll @@ -23,7 +23,7 @@ define void @e(ptr %c, i64 %0) { ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i64> poison, i64 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i64> [[TMP13]], <32 x i64> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i64> [[TMP14]], [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <32 x i64> [[TMP15]], +; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <32 x i64> [[TMP15]], splat (i64 16) ; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP16]]) ; CHECK-NEXT: br i1 [[TMP17]], label %[[FOR_BODY:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll index ec8bcc85e7db0c..78bfb8df51aebb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -78,9 +78,9 @@ define i32 @foo2(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 %n, i32 ; CHECK: for.body: ; CHECK-NEXT: [[I_019:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ [[TMP0]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], -; CHECK-NEXT: [[TMP4]] = fadd <2 x double> [[TMP3]], +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], splat (double 1.000000e+01) +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], splat (double 4.000000e+00) +; CHECK-NEXT: [[TMP4]] = fadd <2 x double> [[TMP3]], splat (double 4.000000e+00) ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_019]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] @@ -236,7 +236,7 @@ define float @sort_phi_type(ptr nocapture readonly %A) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ , [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ splat (float 1.000000e+01), [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP2]] = fmul <4 x float> [[TMP1]], ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll index 3b42ff9d85604d..038409c56789b4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll @@ -15,7 +15,7 @@ define void @Rf_GReset() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @d, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> , double [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> splat (double -0.000000e+00), [[TMP1]] ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr inttoptr (i64 115 to ptr), @Rf_gpptr ; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[IF_END7:%.*]] ; CHECK: if.then: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll index c61d1fbc394798..bcf36c623c27b8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll @@ -9,7 +9,7 @@ define void @powof2div_uniform(ptr noalias nocapture %a, ptr noalias nocapture r ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], splat (i32 2) ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll index d8e646b544179f..5e8119f4207fb5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll @@ -9,7 +9,7 @@ define void @powof2mul_uniform(ptr noalias nocapture %a, ptr noalias nocapture r ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], splat (i32 2) ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -52,7 +52,7 @@ define void @negpowof2mul_uniform(ptr noalias nocapture %a, ptr noalias nocaptur ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], splat (i32 -2) ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -182,20 +182,20 @@ define void @PR51436(ptr nocapture %a) { ; SSE-NEXT: [[GEP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 4 ; SSE-NEXT: [[GEP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 6 ; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 8 -; SSE-NEXT: [[TMP2:%.*]] = mul <2 x i64> [[TMP1]], -; SSE-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], +; SSE-NEXT: [[TMP2:%.*]] = mul <2 x i64> [[TMP1]], splat (i64 -17592186044416) +; SSE-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], splat (i64 -17592186044416) ; SSE-NEXT: store <2 x i64> [[TMP3]], ptr [[A]], align 8 ; SSE-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[GEP2]], align 8 -; SSE-NEXT: [[TMP7:%.*]] = mul <2 x i64> [[TMP6]], -; SSE-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP7]], +; SSE-NEXT: [[TMP4:%.*]] = mul <2 x i64> [[TMP6]], splat (i64 -17592186044416) +; SSE-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP4]], splat (i64 -17592186044416) ; SSE-NEXT: store <2 x i64> [[TMP8]], ptr [[GEP2]], align 8 ; SSE-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr [[GEP4]], align 8 -; SSE-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[TMP11]], -; SSE-NEXT: [[TMP13:%.*]] = add <2 x i64> [[TMP12]], +; SSE-NEXT: [[TMP7:%.*]] = mul <2 x i64> [[TMP11]], splat (i64 -17592186044416) +; SSE-NEXT: [[TMP13:%.*]] = add <2 x i64> [[TMP7]], splat (i64 -17592186044416) ; SSE-NEXT: store <2 x i64> [[TMP13]], ptr [[GEP4]], align 8 ; SSE-NEXT: [[TMP16:%.*]] = load <2 x i64>, ptr [[GEP6]], align 8 -; SSE-NEXT: [[TMP17:%.*]] = mul <2 x i64> [[TMP16]], -; SSE-NEXT: [[TMP18:%.*]] = add <2 x i64> [[TMP17]], +; SSE-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[TMP16]], splat (i64 -17592186044416) +; SSE-NEXT: [[TMP18:%.*]] = add <2 x i64> [[TMP10]], splat (i64 -17592186044416) ; SSE-NEXT: store <2 x i64> [[TMP18]], ptr [[GEP6]], align 8 ; SSE-NEXT: ret void ; @@ -203,12 +203,12 @@ define void @PR51436(ptr nocapture %a) { ; AVX-NEXT: entry: ; AVX-NEXT: [[GEP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 4 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[A]], align 8 -; AVX-NEXT: [[TMP2:%.*]] = mul <4 x i64> [[TMP1]], -; AVX-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP2]], +; AVX-NEXT: [[TMP2:%.*]] = mul <4 x i64> [[TMP1]], splat (i64 -17592186044416) +; AVX-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP2]], splat (i64 -17592186044416) ; AVX-NEXT: store <4 x i64> [[TMP3]], ptr [[A]], align 8 ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr [[GEP4]], align 8 -; AVX-NEXT: [[TMP7:%.*]] = mul <4 x i64> [[TMP6]], -; AVX-NEXT: [[TMP8:%.*]] = add <4 x i64> [[TMP7]], +; AVX-NEXT: [[TMP4:%.*]] = mul <4 x i64> [[TMP6]], splat (i64 -17592186044416) +; AVX-NEXT: [[TMP8:%.*]] = add <4 x i64> [[TMP4]], splat (i64 -17592186044416) ; AVX-NEXT: store <4 x i64> [[TMP8]], ptr [[GEP4]], align 8 ; AVX-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll index 8f96968b978a57..dc4e28fe261784 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll @@ -11,16 +11,16 @@ define void @_Z3fooPml(ptr nocapture %a, i64 %i) { ; CHECK-LABEL: @_Z3fooPml( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 4) ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[A]], align 8 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @total, align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP3]], [[TMP5]] ; CHECK-NEXT: store i64 [[ADD]], ptr @total, align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[A]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = lshr <2 x i64> [[TMP5]], -; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr [[A]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP5]], splat (i64 4) +; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[A]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @total, align 8 ; CHECK-NEXT: [[ADD9:%.*]] = add i64 [[TMP7]], [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll index cb24a9cefffa2e..9fbe0a54b0688d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll @@ -63,16 +63,16 @@ define void @pr35497() local_unnamed_addr #0 { ; SSE-NEXT: store i64 [[ADD]], ptr undef, align 1 ; SSE-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], ptr undef, i64 0, i64 4 ; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i32 0 -; SSE-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], -; SSE-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], +; SSE-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 2) +; SSE-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 20) ; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> ; SSE-NEXT: [[TMP5:%.*]] = add nuw nsw <2 x i64> [[TMP4]], zeroinitializer ; SSE-NEXT: store <2 x i64> [[TMP5]], ptr undef, align 1 ; SSE-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> poison, <2 x i32> ; SSE-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[ADD]], i32 1 -; SSE-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], -; SSE-NEXT: [[TMP9:%.*]] = and <2 x i64> [[TMP8]], -; SSE-NEXT: [[TMP10:%.*]] = lshr <2 x i64> [[TMP5]], +; SSE-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], splat (i64 2) +; SSE-NEXT: [[TMP9:%.*]] = and <2 x i64> [[TMP8]], splat (i64 20) +; SSE-NEXT: [[TMP10:%.*]] = lshr <2 x i64> [[TMP5]], splat (i64 6) ; SSE-NEXT: [[TMP11:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP10]] ; SSE-NEXT: store <2 x i64> [[TMP11]], ptr [[ARRAYIDX2_2]], align 1 ; SSE-NEXT: ret void @@ -84,15 +84,15 @@ define void @pr35497() local_unnamed_addr #0 { ; AVX-NEXT: store i64 [[ADD]], ptr undef, align 1 ; AVX-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], ptr undef, i64 0, i64 4 ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i32 1 -; AVX-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], -; AVX-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], +; AVX-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 2) +; AVX-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 20) ; AVX-NEXT: [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer ; AVX-NEXT: store <2 x i64> [[TMP4]], ptr undef, align 1 ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <2 x i32> ; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[ADD]], i32 1 -; AVX-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], -; AVX-NEXT: [[TMP8:%.*]] = and <2 x i64> [[TMP7]], -; AVX-NEXT: [[TMP9:%.*]] = lshr <2 x i64> [[TMP4]], +; AVX-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], splat (i64 2) +; AVX-NEXT: [[TMP8:%.*]] = and <2 x i64> [[TMP7]], splat (i64 20) +; AVX-NEXT: [[TMP9:%.*]] = lshr <2 x i64> [[TMP4]], splat (i64 6) ; AVX-NEXT: [[TMP10:%.*]] = add nuw nsw <2 x i64> [[TMP8]], [[TMP9]] ; AVX-NEXT: store <2 x i64> [[TMP10]], ptr [[ARRAYIDX2_2]], align 1 ; AVX-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll index dd51105e1a6ca2..2588012847d096 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll @@ -58,7 +58,7 @@ define void @test2(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture %p) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[B:%.*]], i64 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C:%.*]], i64 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[D:%.*]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[P:%.*]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: ret void ; @@ -83,7 +83,7 @@ define void @test2_vec(i32 %0, i32 %1, i32 %2, i32 %3, ptr nocapture %4) { ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP1:%.*]], i64 1 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP2:%.*]], i64 2 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP3:%.*]], i64 3 -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP4:%.*]], align 16, !tbaa [[TBAA0]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll index 264a41daef55b8..e8703f2081c66d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll @@ -7,7 +7,7 @@ define <2 x float> @foo(ptr %A) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], splat (float 2.000000e+00) ; CHECK-NEXT: ret <2 x float> [[TMP2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll index 6960e544b59a8a..a77f2788200dad 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll @@ -7,7 +7,7 @@ define <2 x float> @foo(ptr %A) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], splat (float 2.000000e+00) ; CHECK-NEXT: ret <2 x float> [[TMP2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll index 29021150ccd2e3..bae127527bbbeb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll @@ -11,8 +11,8 @@ define void @store_i32(ptr nocapture %0, i32 %1, i32 %2) { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP8]], <4 x i32> ) +; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], splat (i32 15) +; CHECK-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP8]], <4 x i32> splat (i32 255)) ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] ; CHECK-NEXT: ret void ; @@ -53,8 +53,8 @@ define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP7]], [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP9]], <4 x i32> ) +; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], splat (i32 15) +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP9]], <4 x i32> splat (i32 255)) ; CHECK-NEXT: [[TMP11:%.*]] = trunc nuw <4 x i32> [[TMP10]] to <4 x i8> ; CHECK-NEXT: store <4 x i8> [[TMP11]], ptr [[TMP0]], align 1, !tbaa [[TBAA4]] ; CHECK-NEXT: ret void @@ -104,11 +104,11 @@ define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i64> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i64> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i64> [[TMP8]], splat (i64 15) ; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], -; CHECK-NEXT: [[TMP12:%.*]] = and <4 x i64> [[TMP9]], -; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP12]], <4 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], splat (i32 255) +; CHECK-NEXT: [[TMP12:%.*]] = and <4 x i64> [[TMP9]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP12]], <4 x i64> splat (i64 255) ; CHECK-NEXT: store <4 x i64> [[TMP13]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll index 19cbce0767c925..6001c659a37b75 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll @@ -321,7 +321,7 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512F-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], ; AVX512F-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void @@ -330,7 +330,7 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512VL-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], ; AVX512VL-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void @@ -478,7 +478,7 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 ; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> -; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX512F-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void @@ -487,7 +487,7 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 ; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> -; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX512VL-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void @@ -691,8 +691,8 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> ; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] ; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] ; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void @@ -702,8 +702,8 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> ; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] ; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] ; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll index 9ac4208c632851..b1942bfe073f2c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll @@ -321,7 +321,7 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512F-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], ; AVX512F-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void @@ -330,7 +330,7 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512VL-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], ; AVX512VL-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void @@ -478,7 +478,7 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 ; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> -; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX512F-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void @@ -487,7 +487,7 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 ; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> -; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> , <8 x i32> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] ; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], ; AVX512VL-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void @@ -691,8 +691,8 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> ; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] +; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] ; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] ; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512F-NEXT: ret void @@ -702,8 +702,8 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea ; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer ; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> ; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> -; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] ; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] ; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll index 92a4095c7c57a8..4f8661f6bac078 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll @@ -91,7 +91,7 @@ define { i64, i64 } @compute_min(ptr nocapture noundef nonnull readonly align 2 ; AVX-NEXT: [[TMP17:%.*]] = shl nuw <2 x i64> [[TMP16]], ; AVX-NEXT: [[TMP18:%.*]] = or <2 x i64> [[TMP15]], [[TMP17]] ; AVX-NEXT: [[TMP19:%.*]] = zext <2 x i16> [[TMP7]] to <2 x i64> -; AVX-NEXT: [[TMP20:%.*]] = shl nuw nsw <2 x i64> [[TMP19]], +; AVX-NEXT: [[TMP20:%.*]] = shl nuw nsw <2 x i64> [[TMP19]], splat (i64 16) ; AVX-NEXT: [[TMP21:%.*]] = or <2 x i64> [[TMP18]], [[TMP20]] ; AVX-NEXT: [[TMP22:%.*]] = zext <2 x i16> [[TMP4]] to <2 x i64> ; AVX-NEXT: [[TMP23:%.*]] = or <2 x i64> [[TMP21]], [[TMP22]] @@ -123,7 +123,7 @@ define { i64, i64 } @compute_min(ptr nocapture noundef nonnull readonly align 2 ; AVX2-NEXT: [[TMP17:%.*]] = shl nuw <2 x i64> [[TMP16]], ; AVX2-NEXT: [[TMP18:%.*]] = or <2 x i64> [[TMP15]], [[TMP17]] ; AVX2-NEXT: [[TMP19:%.*]] = zext <2 x i16> [[TMP7]] to <2 x i64> -; AVX2-NEXT: [[TMP20:%.*]] = shl nuw nsw <2 x i64> [[TMP19]], +; AVX2-NEXT: [[TMP20:%.*]] = shl nuw nsw <2 x i64> [[TMP19]], splat (i64 16) ; AVX2-NEXT: [[TMP21:%.*]] = or <2 x i64> [[TMP18]], [[TMP20]] ; AVX2-NEXT: [[TMP22:%.*]] = zext <2 x i16> [[TMP4]] to <2 x i64> ; AVX2-NEXT: [[TMP23:%.*]] = or <2 x i64> [[TMP21]], [[TMP22]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll index b1bc5d14550ef7..26258402b9781d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll @@ -4,7 +4,7 @@ define void @foo(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[T1:%.*]], align 1, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <8 x i8> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <8 x i8> [[TMP2]], splat (i8 64) ; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i8> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP2]], <8 x i8> [[TMP4]] ; CHECK-NEXT: store <8 x i8> [[TMP5]], ptr [[T0:%.*]], align 1, !tbaa [[TBAA0]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll index 6f70aa5f4f6afb..cb02f4d10923c8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll @@ -11,7 +11,7 @@ target triple = "x86_64-unknown-unknown" define void @exact(ptr %x) { ; CHECK-LABEL: @exact( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -40,7 +40,7 @@ define void @exact(ptr %x) { define void @not_exact(ptr %x) { ; CHECK-LABEL: @not_exact( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -69,7 +69,7 @@ define void @not_exact(ptr %x) { define void @nsw(ptr %x) { ; CHECK-LABEL: @nsw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -98,7 +98,7 @@ define void @nsw(ptr %x) { define void @not_nsw(ptr %x) { ; CHECK-LABEL: @not_nsw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -127,7 +127,7 @@ define void @not_nsw(ptr %x) { define void @nuw(ptr %x) { ; CHECK-LABEL: @nuw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -156,7 +156,7 @@ define void @nuw(ptr %x) { define void @not_nuw(ptr %x) { ; CHECK-LABEL: @not_nuw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -185,7 +185,7 @@ define void @not_nuw(ptr %x) { define void @not_nsw_but_nuw(ptr %x) { ; CHECK-LABEL: @not_nsw_but_nuw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -214,7 +214,7 @@ define void @not_nsw_but_nuw(ptr %x) { define void @nnan(ptr %x) { ; CHECK-LABEL: @nnan( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], splat (float 1.000000e+00) ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -243,7 +243,7 @@ define void @nnan(ptr %x) { define void @not_nnan(ptr %x) { ; CHECK-LABEL: @not_nnan( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], splat (float 1.000000e+00) ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -272,7 +272,7 @@ define void @not_nnan(ptr %x) { define void @only_fast(ptr %x) { ; CHECK-LABEL: @only_fast( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], splat (float 1.000000e+00) ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -301,7 +301,7 @@ define void @only_fast(ptr %x) { define void @only_arcp(ptr %x) { ; CHECK-LABEL: @only_arcp( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], splat (float 1.000000e+00) ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[X]], align 4 ; CHECK-NEXT: ret void ; @@ -330,8 +330,8 @@ define void @only_arcp(ptr %x) { define void @addsub_all_nsw(ptr %x) { ; CHECK-LABEL: @addsub_all_nsw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4 ; CHECK-NEXT: ret void @@ -361,8 +361,8 @@ define void @addsub_all_nsw(ptr %x) { define void @addsub_some_nsw(ptr %x) { ; CHECK-LABEL: @addsub_some_nsw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4 ; CHECK-NEXT: ret void @@ -392,8 +392,8 @@ define void @addsub_some_nsw(ptr %x) { define void @addsub_no_nsw(ptr %x) { ; CHECK-LABEL: @addsub_no_nsw( ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4 ; CHECK-NEXT: ret void @@ -424,7 +424,7 @@ define void @fcmp_fast(ptr %x) #1 { ; CHECK-LABEL: @fcmp_fast( ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> , [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> splat (double -0.000000e+00), [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]] ; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[X]], align 8 ; CHECK-NEXT: ret void @@ -482,7 +482,7 @@ define void @fcmp_no_fast(ptr %x) #1 { ; CHECK-LABEL: @fcmp_no_fast( ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> , [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> splat (double -0.000000e+00), [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]] ; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[X]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll index 4c8d6b734ddc15..789ac9ef23b311 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll @@ -24,7 +24,7 @@ define i1 @test1(i32 %x, i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[C]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], splat (i32 1) ; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[D]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index 6200e3ae43fc98..0771fabef3e028 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -274,8 +274,8 @@ define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) { define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_select( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42) +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false @@ -352,9 +352,9 @@ define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp_partial( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], splat (i32 42) ; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[TMP1]], 42 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17) ; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 @@ -481,7 +481,7 @@ define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) { define i1 @logical_and_icmp_extra_args(<4 x i32> %x, i1 %c0, i1 %c1, i1 %c2) { ; CHECK-LABEL: @logical_and_icmp_extra_args( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 17) ; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[C0:%.*]], i1 false diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll index ce9158d8bf2eea..35595b79cd708b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll @@ -22,7 +22,7 @@ define i32 @reduce(ptr nocapture %A, i32 %n, i32 %m) { ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[I_015]], 1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 [[MUL]] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[TMP0]], splat (double 7.000000e+00) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: [[ADD5:%.*]] = fadd double [[TMP2]], [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll index 5dcd5d3190ad0c..29a8a229980e92 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll @@ -52,7 +52,7 @@ define double @foo(ptr nocapture %D) { define i1 @two_wide_fcmp_reduction(<2 x double> %a0) { ; CHECK-LABEL: @two_wide_fcmp_reduction( -; CHECK-NEXT: [[A:%.*]] = fcmp ogt <2 x double> [[A0:%.*]], +; CHECK-NEXT: [[A:%.*]] = fcmp ogt <2 x double> [[A0:%.*]], splat (double 1.000000e+00) ; CHECK-NEXT: [[B:%.*]] = extractelement <2 x i1> [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = extractelement <2 x i1> [[A]], i32 1 ; CHECK-NEXT: [[D:%.*]] = and i1 [[B]], [[C]] @@ -67,7 +67,7 @@ define i1 @two_wide_fcmp_reduction(<2 x double> %a0) { define double @fadd_reduction(<2 x double> %a0) { ; CHECK-LABEL: @fadd_reduction( -; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x double> [[A0:%.*]], +; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x double> [[A0:%.*]], splat (double 1.000000e+00) ; CHECK-NEXT: [[B:%.*]] = extractelement <2 x double> [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = extractelement <2 x double> [[A]], i32 1 ; CHECK-NEXT: [[D:%.*]] = fadd fast double [[B]], [[C]] @@ -102,7 +102,7 @@ define i1 @fcmp_lt_gt(double %a, double %b, double %c) { ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP4]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 ; CHECK-NEXT: [[NOT_OR_COND9:%.*]] = or i1 [[TMP11]], [[TMP12]] @@ -144,7 +144,7 @@ define i1 @fcmp_lt(double %a, double %b, double %c) { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = fdiv <2 x double> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fcmp uge <2 x double> [[TMP8]], +; CHECK-NEXT: [[TMP9:%.*]] = fcmp uge <2 x double> [[TMP8]], splat (double 0x3EB0C6F7A0B5ED8D) ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 ; CHECK-NEXT: [[NOT_OR_COND:%.*]] = or i1 [[TMP10]], [[TMP11]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll index e700e7fa3a85c3..1922e935cee4be 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll @@ -14,7 +14,7 @@ define void @test(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, <8 x ptr> [[TMP1]], <8 x i64> ; CHECK-NEXT: [[GEP2_0:%.*]] = getelementptr inbounds double, ptr [[ARG1:%.*]], i64 16 -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> , <8 x double> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> splat (i1 true), <8 x double> poison) ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x double>, ptr [[GEP2_0]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <8 x double> [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x double>, ptr [[ARG1]], align 8 @@ -24,7 +24,7 @@ define void @test(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr ; CHECK-NEXT: [[I142:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0 ; CHECK-NEXT: [[I143:%.*]] = insertelement <2 x double> [[I142]], double [[TMP9]], i64 1 ; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds double, ptr [[ARG2:%.*]], <2 x i64> -; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I143]], <2 x ptr> [[P]], i32 8, <2 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I143]], <2 x ptr> [[P]], i32 8, <2 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll index 6fbf01013b6d29..f0272d591f0c33 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll @@ -11,7 +11,7 @@ define void @rdx_feeds_single_insert(<2 x double> %v, ptr nocapture readonly %ar ; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> [[TMP1]]) ; CHECK-NEXT: [[I:%.*]] = insertelement <2 x double> [[V:%.*]], double [[TMP2]], i64 1 ; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds double, ptr [[ARG2:%.*]], <2 x i64> -; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I]], <2 x ptr> [[P]], i32 8, <2 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I]], <2 x ptr> [[P]], i32 8, <2 x i1> splat (i1 true)) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll index 59b0352a825929..0807a1bd4cdea4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll @@ -8,11 +8,11 @@ define i32 @test(ptr noalias %p, ptr noalias %addr) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ADDR:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> splat (i1 true), <8 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, <8 x ptr> [[TMP5]], <8 x i32> [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison) ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7]]) ; CHECK-NEXT: ret i32 [[TMP8]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll index cd7ad210ca5678..c7c67d31f9ded6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll @@ -7,7 +7,7 @@ define void @test(ptr noalias %0, ptr %p) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x ptr> [[TMP2]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, <8 x ptr> [[TMP3]], <8 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x float> poison) +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison) ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP8]], <16 x float> , <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index 042fb88297e2ba..cb955ff91ed810 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -29,7 +29,7 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP12]], <4 x i8> [[TMP13]], i64 4) ; CHECK-NEXT: [[TMP15:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8> ; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v2i8(<16 x i8> [[TMP14]], <2 x i8> [[TMP15]], i64 2) -; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], +; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], splat (i8 1) ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr undef, align 1 ; CHECK-NEXT: ret void ; CHECK: if.end50.i: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll index 6bbc33bac80f9d..79ce74bd21dbcf 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll @@ -14,7 +14,7 @@ define i32 @foo(i32 %0, ptr %1, ptr %2) { ; CHECK-NEXT: br label [[T37:%.*]] ; CHECK: t37: ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[TMP3:%.*]] ], [ [[T89:%.*]], [[T37]] ] -; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast <2 x float> , [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[TMP6]] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[T4]], i64 0, i32 2, i64 0 ; CHECK-NEXT: store <4 x float> [[SHUFFLE]], ptr [[T21]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll index 14033f28b93163..d1b177e2b6dea5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll @@ -5,7 +5,7 @@ define void @test(ptr %p, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[B:%.*]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], splat (i32 1) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[E:%.*]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[F:%.*]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder-non-empty.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder-non-empty.ll index 3bece6b7cf9a7e..9d63c0f1aa59e3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder-non-empty.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder-non-empty.ll @@ -6,7 +6,7 @@ define double @test01() { ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr null, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, <2 x ptr> zeroinitializer, <2 x i32> [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP2]], i32 8, <2 x i1> , <2 x double> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP2]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> , <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll index f10e0a84081669..26e62d36fb6a82 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll @@ -11,12 +11,12 @@ define i32 @slp_schedule_bundle() local_unnamed_addr #0 { ; CHECK-LABEL: @slp_schedule_bundle( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[TMP0]], splat (i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 1) ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr @a, align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr ([1 x i32], ptr @b, i64 4, i64 0), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 31) +; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr getelementptr ([1 x i32], ptr @a, i64 4, i64 0), align 4 ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll index a009b1eaf65f13..242d66fda569a6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll @@ -8,7 +8,7 @@ define void @wombat(ptr %ptr, ptr %ptr1) { ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], splat (i32 -1) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP1]], undef ; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> undef, <4 x i32> [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll index 89c64d64b9c844..1d101318bdf876 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll @@ -14,7 +14,7 @@ define i32 @rollable(ptr noalias nocapture %in, ptr noalias nocapture %out, i64 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], splat (i32 7) ; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP8]] = add i64 [[I_019]], 1 @@ -76,11 +76,11 @@ define i32 @unrollable(ptr %in, ptr %out, i64 %n) nounwind ssp uwtable { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i32> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i32> [[TMP7]], splat (i32 7) ; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP8]], ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP11:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i32> [[TMP11]], +; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i32> [[TMP11]], splat (i32 7) ; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i32> [[TMP12]], ; CHECK-NEXT: store <2 x i32> [[TMP9]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[BARRIER:%.*]] = call i32 @goo(i32 0) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll index a0e17446b6abb1..917baaa86460c4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll @@ -7,7 +7,7 @@ define void @foo(ptr %ptr) { ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 328 ; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 334 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[GEP0]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i16> [[TMP1]], splat (i16 -1) ; CHECK-NEXT: [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double> ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i16>, ptr [[GEP3]], align 2 ; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i16> [[TMP5]] to <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll index ba83ff096c9acb..8fe7d15b69cb18 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2_unord_geps.ll @@ -8,14 +8,14 @@ define void @test(ptr noalias %p, ptr noalias %addr, ptr noalias %s) { ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE1]], <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE1]], <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> splat (i1 true), <8 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0 ; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE2]], <8 x i32> [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> , <8 x i32> poison) -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP1]], i32 8, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison) +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP1]], i32 8, <8 x i1> splat (i1 true), <8 x i32> poison) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, <8 x ptr> [[SHUFFLE2]], <8 x i32> [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP8]], i32 4, <8 x i1> , <8 x i32> poison) +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP8]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison) ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <8 x i32> [[TMP9]], [[TMP6]] ; CHECK-NEXT: store <8 x i32> [[TMP10]], ptr [[S:%.*]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll index f4a0fc84cee8e6..88a0ba59fa633c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll @@ -6,7 +6,7 @@ define void @stackrestore1(ptr %out) { ; CHECK-LABEL: @stackrestore1( ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() ; CHECK-NEXT: [[LOCAL_ALLOCA:%.*]] = alloca [16 x i8], align 4 -; CHECK-NEXT: store <4 x float> , ptr [[LOCAL_ALLOCA]], align 4 +; CHECK-NEXT: store <4 x float> splat (float 0x3FF3333340000000), ptr [[LOCAL_ALLOCA]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[LOCAL_ALLOCA]], align 4 ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll index e93c5244dfbe2c..977942aa06c51a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll @@ -8,7 +8,7 @@ define void @basecase(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: @basecase( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8 ; CHECK-NEXT: store ptr null, ptr [[A]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1) ; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -34,7 +34,7 @@ define void @allocas(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: [[V2:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8 ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 @@ -126,7 +126,7 @@ define void @stacksave2(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: [[V2:%.*]] = alloca inalloca i8, align 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]] @@ -161,7 +161,7 @@ define void @stacksave3(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -188,7 +188,7 @@ define void @stacksave3(ptr %a, ptr %b, ptr %c) { define void @stacksave4(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: @stacksave4( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1) ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() ; CHECK-NEXT: [[X:%.*]] = alloca inalloca i8, align 1 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]] @@ -218,7 +218,7 @@ define void @stacksave4(ptr %a, ptr %b, ptr %c) { define void @stacksave5(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: @stacksave5( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1) ; CHECK-NEXT: [[STACK:%.*]] = call ptr @llvm.stacksave.p0() ; CHECK-NEXT: [[X:%.*]] = alloca inalloca i8, align 1 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]] @@ -258,7 +258,7 @@ define void @stackrestore1(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: store i8 0, ptr [[V2]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1) ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll index 378403ac304683..02f8c552fec887 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll @@ -26,7 +26,7 @@ define void @_Z8DistanceIlLi5EEvPfiPmS0_(ptr %p1, i32 %p2, ptr %p3, ptr %p4) { ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]] ; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], splat (i64 5) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[P3]], align 8 ; CHECK-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0 @@ -145,7 +145,7 @@ define void @store15(ptr %p1, i32 %p2, ptr %p3, ptr %p4) { ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], splat (i64 5) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[P3]], align 8 ; CHECK-NEXT: ret void ; @@ -223,7 +223,7 @@ define void @store16(ptr %p1, i32 %p2, ptr %p3, ptr %p4) { ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8 ; CHECK-NEXT: store i64 5, ptr [[ARRAYIDX9]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], splat (i64 5) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[P3]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll index 2f126df81f2305..83e1bef8fa066a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll @@ -26,7 +26,7 @@ define i1 @test(i64 %v1, ptr %v2, i32 %v3, i1 %v4) { ; CHECK-NEXT: [[CONV_I_I1743_3:%.*]] = fptoui float [[TMP17]] to i32 ; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i32 [[CONV_I_I1743_3]], 0 ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x float> [[TMP16]] to <4 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = icmp ult <4 x i32> [[TMP19]], +; CHECK-NEXT: [[TMP20:%.*]] = icmp ult <4 x i32> [[TMP19]], splat (i32 1333788672) ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3 ; CHECK-NEXT: [[NARROW:%.*]] = select i1 [[TMP21]], i1 [[TMP18]], i1 false ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll b/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll index 69ae26b9f25856..912b148c310068 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll @@ -218,7 +218,7 @@ define void @supernode_scheduling_cross_block(ptr %Aarray, ptr %Barray, ptr %Sar ; ENABLED-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8 ; ENABLED-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 ; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[B1]], i32 1 -; ENABLED-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], +; ENABLED-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], splat (double 2.000000e+00) ; ENABLED-NEXT: br label [[BB:%.*]] ; ENABLED: bb: ; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll index 71b957ac7bbd89..dbd4adea4a4ece 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll @@ -15,7 +15,7 @@ define void @foo(ptr nocapture %x) #0 { ; CHECK-NEXT: [[BAD:%.*]] = fadd float [[BAD]], 0.000000e+00 ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ poison, [[BB1:%.*]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ poison, [[BB1:%.*]] ], [ splat (i32 2), [[ENTRY:%.*]] ] ; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[X:%.*]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll index 96d4b84e036918..6e2a43ac5f9f10 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll @@ -166,7 +166,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 ; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4 -; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], +; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01) ; NON-POW2-NEXT: store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4 ; NON-POW2-NEXT: ret void ; @@ -177,7 +177,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 ; POW2-ONLY-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01 ; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4 -; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], +; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01) ; POW2-ONLY-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4 ; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2 ; POW2-ONLY-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll index 373feb4a658f0b..0ed12760b563fa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll @@ -52,7 +52,7 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP6]], <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 ; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3) ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll index da935173b20d11..f47373747e578a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll @@ -52,7 +52,7 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP6]], <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 ; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3) ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll index e52b29a7f681c7..d650a972ad8ca2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll @@ -47,7 +47,7 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 ; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3) ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll index e30cb76d53d928..869a9d1aee80e3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll @@ -225,8 +225,8 @@ define void @addsub1(ptr noalias %dst, ptr noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], splat (i32 -1) +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> [[TMP0]], splat (i32 -1) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[DST]], align 4 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3 @@ -599,8 +599,8 @@ define void @addsub1f(ptr noalias %dst, ptr noalias %src) { ; CHECK-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2 ; CHECK-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[SRC]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x float> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <2 x float> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x float> [[TMP0]], splat (float -1.000000e+00) +; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <2 x float> [[TMP0]], splat (float -1.000000e+00) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <2 x i32> ; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[DST]], align 4 ; CHECK-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll index f29d4883049194..79bef36515a88a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll @@ -24,7 +24,7 @@ define double @root_selection(double %a, double %b, double %c, double %d) local_ ; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x double> [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP7]], ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <2 x double> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast <2 x double> [[TMP9]], +; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast <2 x double> [[TMP9]], splat (double 1.400000e+00) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 1 ; CHECK-NEXT: [[I07:%.*]] = fadd fast double undef, [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll index 51b635837d3b59..da1b87b28a3045 100644 --- a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll +++ b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll @@ -9,7 +9,7 @@ define i32 @test(i32 %n) { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw <2 x i64> [[TMP3]], splat (i64 273837369) ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP7]], i1 true) ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll index 287b623f63690a..ad4daeab003f5f 100644 --- a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll +++ b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll @@ -153,7 +153,7 @@ define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d, ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[INP]], 5 ; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i64 0 ; CHECK-NEXT: [[V:%.*]] = insertelement <2 x i8> [[V0]], i8 [[ADD]], i64 1 -; CHECK-NEXT: [[DIV0:%.*]] = sdiv <2 x i8> , [[V]] +; CHECK-NEXT: [[DIV0:%.*]] = sdiv <2 x i8> splat (i8 -128), [[V]] ; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123 ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[DIV0]], i8 [[TMP1]], i64 0 ; CHECK-NEXT: ret <2 x i8> [[R]] diff --git a/llvm/test/Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll index f0d5629cfc22e9..3cf359fd47351a 100644 --- a/llvm/test/Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll @@ -12,8 +12,8 @@ define void @test(ptr %0, i8 %1, i1 %cmp12.i) { ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[PRE:%.*]] ; CHECK: pre: -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP5]], <8 x i8> ) -; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i8> [[TMP8]], +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP5]], <8 x i8> splat (i8 1)) +; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i8> [[TMP6]], splat (i8 1) ; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP9]], <8 x i8> [[TMP5]] ; CHECK-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP0]], align 1 ; CHECK-NEXT: br label [[PRE]] diff --git a/llvm/test/Transforms/SLPVectorizer/extended-vectorized-gathered-inst.ll b/llvm/test/Transforms/SLPVectorizer/extended-vectorized-gathered-inst.ll index 94aa8de5fdb72c..118507c790bfc8 100644 --- a/llvm/test/Transforms/SLPVectorizer/extended-vectorized-gathered-inst.ll +++ b/llvm/test/Transforms/SLPVectorizer/extended-vectorized-gathered-inst.ll @@ -14,7 +14,7 @@ define void @test(ptr %top) { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i8> , i8 [[TMP4]], i32 3 ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i8> [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i8> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i8> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i8> [[TMP7]], splat (i8 2) ; CHECK-NEXT: br label [[FOR_COND_I:%.*]] ; CHECK: for.cond.i: ; CHECK-NEXT: store <4 x i8> [[TMP8]], ptr null, align 1 diff --git a/llvm/test/Transforms/SLPVectorizer/freeze-signedness-missed.ll b/llvm/test/Transforms/SLPVectorizer/freeze-signedness-missed.ll index 6cd44c29788274..eba97b715ea588 100644 --- a/llvm/test/Transforms/SLPVectorizer/freeze-signedness-missed.ll +++ b/llvm/test/Transforms/SLPVectorizer/freeze-signedness-missed.ll @@ -6,7 +6,7 @@ define i32 @test(i1 %.b, i8 %conv18, i32 %k.promoted61) { ; CHECK-SAME: i1 [[DOTB:%.*]], i8 [[CONV18:%.*]], i32 [[K_PROMOTED61:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> poison, i1 [[DOTB]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i8> ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i8> diff --git a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll index 5f02b0009367c7..74e52da6880400 100644 --- a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll @@ -469,7 +469,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], +; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], splat (double 1.000000e+00) ; CHECK-NEXT: ret <4 x double> [[TMP6]] ; %t0 = fadd double %w , 0.000000e+00 diff --git a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll index 63d55f7b1deb58..8e3a941932c976 100644 --- a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll @@ -503,7 +503,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], +; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], splat (double 1.000000e+00) ; CHECK-NEXT: ret <4 x double> [[TMP6]] ; %t0 = fadd double %w , 0.000000e+00 diff --git a/llvm/test/Transforms/SLPVectorizer/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/jumbled_store_crash.ll index f53e22d89d5169..50cc97a529f5fb 100644 --- a/llvm/test/Transforms/SLPVectorizer/jumbled_store_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/jumbled_store_crash.ll @@ -23,7 +23,7 @@ define dso_local void @j() local_unnamed_addr { ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i32> [[TMP5]], [[TMP3]] ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i32> [[TMP6]] to <2 x float> -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP7]], +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP7]], splat (float 1.000000e+01) ; CHECK-NEXT: [[TMP9:%.*]] = fsub <2 x float> , [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP10]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll b/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll index 408dda2211b8d1..62417268bf3d0a 100644 --- a/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll +++ b/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll @@ -10,8 +10,8 @@ define i64 @src(i32 %a) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], splat (i64 4294967297) +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP16]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll b/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll index 2037e0d67d2f89..4478eab7b827ab 100644 --- a/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll +++ b/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll @@ -10,7 +10,7 @@ define i32 @test(ptr %b, ptr %c, i32 %0, ptr %a, i1 %tobool3.not) { ; CHECK: bb1: ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP2]], splat (i32 16) ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i16> ; CHECK-NEXT: br label [[BB3:%.*]] @@ -22,8 +22,8 @@ define i32 @test(ptr %b, ptr %c, i32 %0, ptr %a, i1 %tobool3.not) { ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL3_NOT]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i1> [[TMP10]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP7]], <4 x i32> [[TMP9]] -; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[TMP12]], -; CHECK-NEXT: [[TMP14:%.*]] = ashr <4 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[TMP12]], splat (i32 16) +; CHECK-NEXT: [[TMP14:%.*]] = ashr <4 x i32> [[TMP13]], splat (i32 16) ; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16> ; CHECK-NEXT: br i1 true, label [[BB3]], label [[BB2]] ; CHECK: bb3: diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll index 4074b8654362e0..3ef0de177b478b 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll @@ -13,7 +13,7 @@ define i64 @test(ptr %p) { ; RISCV-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> ; RISCV-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) ; RISCV-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) -; RISCV-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], +; RISCV-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], splat (i64 42) ; RISCV-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) ; RISCV-NEXT: ret i64 [[TMP6]] ; @@ -21,7 +21,7 @@ define i64 @test(ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <6 x i64>, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i64> [[TMP0]], <6 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], splat (i64 42) ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]]) ; CHECK-NEXT: ret i64 [[TMP3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction_loads.ll index 1b37c0a77e94e0..c1efc85da554d4 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction_loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction_loads.ll @@ -27,7 +27,7 @@ define i32 @test(ptr nocapture readonly %p) { ; CHECK: for.body: ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], splat (i32 42) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP3]], [[SUM]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] diff --git a/llvm/test/Transforms/SLPVectorizer/reudction-or-non-poisoned.ll b/llvm/test/Transforms/SLPVectorizer/reudction-or-non-poisoned.ll index 0359c8de8c96c8..48a46662fef085 100644 --- a/llvm/test/Transforms/SLPVectorizer/reudction-or-non-poisoned.ll +++ b/llvm/test/Transforms/SLPVectorizer/reudction-or-non-poisoned.ll @@ -8,7 +8,7 @@ define i1 @test(i32 %x, i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[B]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[C]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], splat (i32 1) ; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) ; CHECK-NEXT: ret i1 [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll b/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll index 3e00550a885215..b37c4c97a5d137 100644 --- a/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll @@ -22,12 +22,12 @@ define void @blam(ptr %arg, double %load2, i1 %fcmp3) { ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> , <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = fcmp olt <2 x double> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> +; CHECK-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP16:%.*]] = fcmp ogt <2 x double> [[TMP15]], zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP16]], <2 x double> zeroinitializer, <2 x double> [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = fcmp olt <2 x double> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> , <2 x double> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer ; CHECK-NEXT: store <2 x double> [[TMP20]], ptr [[GETELEMENTPTR13]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll b/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll index 965bfc7074c638..4e31095a1d49a5 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll @@ -18,10 +18,10 @@ define void @e() { ; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i32> [[DOTSPLAT]], ; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[DOTSPLAT]], ; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[DOTSPLAT]], -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <16 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <16 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <16 x i32> [[INDUCTION]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <16 x i32> [[TMP1]], splat (i32 2) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <16 x i32> [[TMP2]], splat (i32 2) +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i32> [[TMP3]], splat (i32 2) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <16 x i32> [[INDUCTION]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <16 x i32> [[DOTSPLAT]], ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i1> [[TMP9]], [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll b/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll index 2f0bd4a8f1315c..179c5b2f7f91cd 100644 --- a/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll +++ b/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll @@ -9,14 +9,14 @@ define void @foo(ptr %this, ptr %p, i32 %add7) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[ADD7:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[TMP0]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: switch i32 undef, label [[SW_EPILOG:%.*]] [ ; CHECK-NEXT: i32 0, label [[SW_BB:%.*]] ; CHECK-NEXT: i32 2, label [[SW_BB]] ; CHECK-NEXT: ] ; CHECK: sw.bb: -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1) ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[THIS:%.*]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP4]], [[TMP3]] ; CHECK-NEXT: br label [[SW_EPILOG]] diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll index 6a1a23728d3c70..5326b9802ec6de 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct3.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll @@ -240,7 +240,7 @@ define void @slice_store_v2i8_1(ptr %dst, ptr %dst.2, ptr %src) { ; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 6 ; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load <2 x i8>, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 2 ; CHECK-NEXT: store <2 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_SROA_0]], align 4 -; CHECK-NEXT: store <2 x i8> bitcast (<1 x i16> to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4 +; CHECK-NEXT: store <2 x i8> bitcast (<1 x i16> splat (i16 123) to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4 ; CHECK-NEXT: [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4:%.*]] = load <2 x i8>, ptr [[A_SROA_2_SROA_0]], align 4 ; CHECK-NEXT: store <2 x i8> [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4]], ptr [[DST_2]], align 2 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 6, i1 true) diff --git a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll index fe14f2ff17f838..51425c6305fe18 100644 --- a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll +++ b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll @@ -66,7 +66,7 @@ define <2 x i64> @scalarize_v2i64_ones_mask(ptr %p, <2 x i64> %passthru) { ; CHECK-LE-NEXT: ret <2 x i64> [[TMP1]] ; ; CHECK-LE-SVE-LABEL: @scalarize_v2i64_ones_mask( -; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> , <2 x i64> [[PASSTHRU:%.*]]) +; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> splat (i1 true), <2 x i64> [[PASSTHRU:%.*]]) ; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]] ; ; CHECK-BE-LABEL: @scalarize_v2i64_ones_mask( diff --git a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll index 2f73227f056707..0acc551f81f64d 100644 --- a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll +++ b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll @@ -62,7 +62,7 @@ define void @scalarize_v2i64_ones_mask(ptr %p, <2 x i64> %data) { ; CHECK-LE-NEXT: ret void ; ; CHECK-SVE-LE-LABEL: @scalarize_v2i64_ones_mask( -; CHECK-SVE-LE-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[DATA:%.*]], ptr [[P:%.*]], i32 8, <2 x i1> ) +; CHECK-SVE-LE-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[DATA:%.*]], ptr [[P:%.*]], i32 8, <2 x i1> splat (i1 true)) ; CHECK-SVE-LE-NEXT: ret void ; ; CHECK-BE-LABEL: @scalarize_v2i64_ones_mask( diff --git a/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll b/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll index fd50b1bcec295b..3b6f5ccf3d3ea0 100644 --- a/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll +++ b/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll @@ -47,7 +47,7 @@ define void @f2() { ; CHECK: for.end: ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[E_SROA_3_2:%.*]] = phi <2 x i64> [ , [[FOR_END]] ], [ [[E_SROA_3_2]], [[FOR_BODY2:%.*]] ] +; CHECK-NEXT: [[E_SROA_3_2:%.*]] = phi <2 x i64> [ splat (i64 1), [[FOR_END]] ], [ [[E_SROA_3_2]], [[FOR_BODY2:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 6, [[FOR_END]] ], [ [[TMP0]], [[FOR_BODY2]] ] ; CHECK-NEXT: br i1 undef, label [[FOR_BODY2]], label [[FOR_COND1_FOR_END7_CRIT_EDGE:%.*]] ; CHECK: for.cond1.for.end7_crit_edge: diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll index bdfac4f2b30cfd..c86fa349200c54 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll @@ -82,7 +82,7 @@ define i32 @basic_veccond(i32 %N, <2 x i1> %cond, <2 x i32> %select_input) { ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[COND1:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[SELECT_INPUT]], <2 x i32> +; CHECK-NEXT: [[COND1:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[SELECT_INPUT]], <2 x i32> splat (i32 42) ; CHECK-NEXT: [[VREDUCE:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[COND1]]) ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[VREDUCE]], [[RES]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1 diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll index 03db1bba7d22a0..405afd5969a413 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll @@ -125,7 +125,7 @@ define i64 @load_after_store(i32 %a, ptr %b, ptr %p) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1> -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64 @@ -234,7 +234,7 @@ define i16 @debug_metadata_diassign(i1 %cond, i16 %a, ptr %p) { ; CHECK-LABEL: @debug_metadata_diassign( ; CHECK-NEXT: bb0: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> -; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> , ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> splat (i16 7), ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]]) ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i16 3, i16 2 ; CHECK-NEXT: ret i16 [[SPEC_SELECT]] ; @@ -311,12 +311,12 @@ define void @threshold_6(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ; CHECK-LABEL: @threshold_6( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]]) -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]]) -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]]) -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]]) -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]]) -; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 2), ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 3), ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 4), ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 5), ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 6), ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]]) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SimplifyCFG/preserve-store-alignment.ll b/llvm/test/Transforms/SimplifyCFG/preserve-store-alignment.ll index 00cee73da428c9..c80e22499775aa 100644 --- a/llvm/test/Transforms/SimplifyCFG/preserve-store-alignment.ll +++ b/llvm/test/Transforms/SimplifyCFG/preserve-store-alignment.ll @@ -10,16 +10,16 @@ define i32 @align_both_equal() local_unnamed_addr { ; CHECK-LABEL: @align_both_equal( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @m, align 8 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP2]], 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[AND4:%.*]] = and i64 [[TMP2]], 2 ; CHECK-NEXT: [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TOBOOL]], true ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true @@ -64,16 +64,16 @@ define i32 @align_not_equal() local_unnamed_addr { ; CHECK-LABEL: @align_not_equal( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @m, align 8 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP2]], 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[AND4:%.*]] = and i64 [[TMP2]], 2 ; CHECK-NEXT: [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TOBOOL]], true ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true @@ -118,16 +118,16 @@ define i32 @align_single_zero() local_unnamed_addr { ; CHECK-LABEL: @align_single_zero( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @m, align 8 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP2]], 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[AND4:%.*]] = and i64 [[TMP2]], 2 ; CHECK-NEXT: [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TOBOOL]], true ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true @@ -172,16 +172,16 @@ define i32 @align_single_zero_second_greater_default() local_unnamed_addr { ; CHECK-LABEL: @align_single_zero_second_greater_default( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @m, align 8 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP2]], 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[AND4:%.*]] = and i64 [[TMP2]], 2 ; CHECK-NEXT: [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TOBOOL]], true ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true @@ -226,16 +226,16 @@ define i32 @align_both_zero() local_unnamed_addr { ; CHECK-LABEL: @align_both_zero( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1) ; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @m, align 8 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[TMP2]], 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]] ; CHECK-NEXT: [[AND4:%.*]] = and i64 [[TMP2]], 2 ; CHECK-NEXT: [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TOBOOL]], true ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll index 591e74c1341350..d85331f77b12f8 100644 --- a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll +++ b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll @@ -105,13 +105,13 @@ define void @stride_is_minus_2s(i32 %b, i32 %s) { define void @stride_is_minus_2s_vec(<2 x i32> %b, <2 x i32> %s) { ; CHECK-LABEL: @stride_is_minus_2s_vec( -; CHECK-NEXT: [[S6:%.*]] = mul <2 x i32> [[S:%.*]], +; CHECK-NEXT: [[S6:%.*]] = mul <2 x i32> [[S:%.*]], splat (i32 6) ; CHECK-NEXT: [[T1:%.*]] = add <2 x i32> [[B:%.*]], [[S6]] ; CHECK-NEXT: call void @voo(<2 x i32> [[T1]]) -; CHECK-NEXT: [[S4:%.*]] = shl <2 x i32> [[S]], +; CHECK-NEXT: [[S4:%.*]] = shl <2 x i32> [[S]], splat (i32 2) ; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[B]], [[S4]] ; CHECK-NEXT: call void @voo(<2 x i32> [[T2]]) -; CHECK-NEXT: [[S2:%.*]] = shl <2 x i32> [[S]], +; CHECK-NEXT: [[S2:%.*]] = shl <2 x i32> [[S]], splat (i32 1) ; CHECK-NEXT: [[T3:%.*]] = add <2 x i32> [[B]], [[S2]] ; CHECK-NEXT: call void @voo(<2 x i32> [[T3]]) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll index 775ad4c5ecc363..ebd2c5bd2574b5 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll @@ -562,7 +562,7 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i8> [[TMP53]], <16 x i8> [[TMP54]], <16 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = zext <16 x i8> [[TMP55]] to <16 x i32> ; CHECK-NEXT: [[TMP57:%.*]] = sub nsw <16 x i32> [[TMP48]], [[TMP56]] -; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], +; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], splat (i32 16) ; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP40]] ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP59]], <16 x i32> ; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP59]], <16 x i32> @@ -589,9 +589,9 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP82:%.*]] = add nsw <16 x i32> [[TMP79]], [[TMP81]] ; CHECK-NEXT: [[TMP83:%.*]] = sub nsw <16 x i32> [[TMP78]], [[TMP80]] ; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <16 x i32> [[TMP82]], <16 x i32> [[TMP83]], <16 x i32> -; CHECK-NEXT: [[TMP85:%.*]] = lshr <16 x i32> [[TMP84]], -; CHECK-NEXT: [[TMP86:%.*]] = and <16 x i32> [[TMP85]], -; CHECK-NEXT: [[TMP87:%.*]] = mul nuw <16 x i32> [[TMP86]], +; CHECK-NEXT: [[TMP85:%.*]] = lshr <16 x i32> [[TMP84]], splat (i32 15) +; CHECK-NEXT: [[TMP86:%.*]] = and <16 x i32> [[TMP85]], splat (i32 65537) +; CHECK-NEXT: [[TMP87:%.*]] = mul nuw <16 x i32> [[TMP86]], splat (i32 65535) ; CHECK-NEXT: [[TMP88:%.*]] = add <16 x i32> [[TMP87]], [[TMP84]] ; CHECK-NEXT: [[TMP89:%.*]] = xor <16 x i32> [[TMP88]], [[TMP87]] ; CHECK-NEXT: [[TMP90:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP89]]) @@ -768,7 +768,7 @@ define i32 @full_reorder(ptr nocapture noundef readonly %pix1, i32 noundef %i_pi ; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]] -; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], +; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16) ; CHECK-NEXT: [[TMP43:%.*]] = add nsw <16 x i32> [[TMP42]], [[TMP26]] ; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> [[TMP43]], <16 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> [[TMP43]], <16 x i32> @@ -795,9 +795,9 @@ define i32 @full_reorder(ptr nocapture noundef readonly %pix1, i32 noundef %i_pi ; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP62]], [[TMP64]] ; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> -; CHECK-NEXT: [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], -; CHECK-NEXT: [[TMP70:%.*]] = and <16 x i32> [[TMP69]], -; CHECK-NEXT: [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], +; CHECK-NEXT: [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], splat (i32 15) +; CHECK-NEXT: [[TMP70:%.*]] = and <16 x i32> [[TMP69]], splat (i32 65537) +; CHECK-NEXT: [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], splat (i32 65535) ; CHECK-NEXT: [[TMP72:%.*]] = add <16 x i32> [[TMP71]], [[TMP68]] ; CHECK-NEXT: [[TMP73:%.*]] = xor <16 x i32> [[TMP72]], [[TMP71]] ; CHECK-NEXT: [[TMP74:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP73]]) diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll index 4216b0e643bb68..3c672efbb5a07a 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll @@ -25,7 +25,7 @@ define i32 @test_mask_or(<16 x i32> %a, ptr %b) { ; CHECK-LABEL: @test_mask_or( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1 -; CHECK-NEXT: [[A_MASKED:%.*]] = and <16 x i32> [[A:%.*]], +; CHECK-NEXT: [[A_MASKED:%.*]] = and <16 x i32> [[A:%.*]], splat (i32 16) ; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[A_MASKED]] to <16 x i8> ; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i8> [[WIDE_LOAD]], [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i32> @@ -44,14 +44,14 @@ entry: define i32 @multiuse(<16 x i32> %u, <16 x i32> %v, ptr %b) { ; CHECK-LABEL: @multiuse( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], -; CHECK-NEXT: [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], +; CHECK-NEXT: [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], splat (i32 255) +; CHECK-NEXT: [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], splat (i32 255) ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1 -; CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], splat (i8 4) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[V_MASKED]] to <16 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i8> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i8> [[WIDE_LOAD]], splat (i8 15) ; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[U_MASKED]] to <16 x i8> ; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i32> diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index 459ede173b841a..7472e1bc52bb8c 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -292,7 +292,7 @@ define <8 x i8> @undeflane(<8 x i8> %a, <8 x i8> %b) { define <8 x i8> @constantsplat(<8 x i8> %a) { ; CHECK-LABEL: @constantsplat( -; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], splat (i8 10) ; CHECK-NEXT: ret <8 x i8> [[R]] ; %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> @@ -339,7 +339,7 @@ define <8 x i8> @constantdiff2(<8 x i8> %a) { define <8 x half> @constantsplatf(<8 x half> %a) { ; CHECK-LABEL: @constantsplatf( -; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], splat (half 0xH4900) ; CHECK-NEXT: ret <8 x half> [[R]] ; %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> diff --git a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll index a22575ccb1ca21..46a622148c8718 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll @@ -28,7 +28,7 @@ define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) { define i32 @reduceshuffle_onein_const_v4i32(<4 x i32> %a) { ; CHECK-LABEL: @reduceshuffle_onein_const_v4i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -179,7 +179,7 @@ define i32 @reduceshuffle_twoin_extrashuffleuse_v4i32(<4 x i32> %a, <4 x i32> %b define i32 @reduceshuffle_twoin_extraotheruse_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_extraotheruse_v4i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]]) ; CHECK-NEXT: call void @use(<4 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] @@ -234,7 +234,7 @@ define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) { define i32 @reduceshuffle_onein_ext_v16i32(<16 x i32> %a) { ; CHECK-LABEL: @reduceshuffle_onein_ext_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -247,7 +247,7 @@ define i32 @reduceshuffle_onein_ext_v16i32(<16 x i32> %a) { define i32 @reduceshuffle_twoin_concat_v16i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_concat_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -260,7 +260,7 @@ define i32 @reduceshuffle_twoin_concat_v16i32(<8 x i32> %a, <8 x i32> %b) { define i32 @reduceshuffle_twoin_lowelt_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_lowelt_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -273,7 +273,7 @@ define i32 @reduceshuffle_twoin_lowelt_v16i32(<16 x i32> %a, <16 x i32> %b) { define i32 @reduceshuffle_twoin_notlowelt_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_notlowelt_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -286,7 +286,7 @@ define i32 @reduceshuffle_twoin_notlowelt_v16i32(<16 x i32> %a, <16 x i32> %b) { define i32 @reduceshuffle_twoin_uneven_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_uneven_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -299,9 +299,9 @@ define i32 @reduceshuffle_twoin_uneven_v16i32(<16 x i32> %a, <16 x i32> %b) { define i32 @reduceshuffle_twoin_shr1_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_shr1_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[A1:%.*]] = lshr <16 x i32> [[S]], -; CHECK-NEXT: [[A2:%.*]] = and <16 x i32> [[A1]], -; CHECK-NEXT: [[A3:%.*]] = mul nuw <16 x i32> [[A2]], +; CHECK-NEXT: [[A1:%.*]] = lshr <16 x i32> [[S]], splat (i32 15) +; CHECK-NEXT: [[A2:%.*]] = and <16 x i32> [[A1]], splat (i32 65537) +; CHECK-NEXT: [[A3:%.*]] = mul nuw <16 x i32> [[A2]], splat (i32 65535) ; CHECK-NEXT: [[A4:%.*]] = add <16 x i32> [[A3]], [[S]] ; CHECK-NEXT: [[A5:%.*]] = xor <16 x i32> [[A4]], [[A3]] ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[A5]]) @@ -320,9 +320,9 @@ define i32 @reduceshuffle_twoin_shr1_v16i32(<16 x i32> %a, <16 x i32> %b) { define i32 @reduceshuffle_twoin_shr2_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_shr2_v16i32( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[A1:%.*]] = lshr <16 x i32> , [[S]] -; CHECK-NEXT: [[A2:%.*]] = and <16 x i32> [[A1]], -; CHECK-NEXT: [[A3:%.*]] = mul nuw <16 x i32> [[A2]], +; CHECK-NEXT: [[A1:%.*]] = lshr <16 x i32> splat (i32 15), [[S]] +; CHECK-NEXT: [[A2:%.*]] = and <16 x i32> [[A1]], splat (i32 65537) +; CHECK-NEXT: [[A3:%.*]] = mul nuw <16 x i32> [[A2]], splat (i32 65535) ; CHECK-NEXT: [[A4:%.*]] = add <16 x i32> [[A3]], [[S]] ; CHECK-NEXT: [[A5:%.*]] = xor <16 x i32> [[A4]], [[A3]] ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[A5]]) @@ -365,7 +365,7 @@ define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) { define i16 @reduceshuffle_onein_ext_v16i16(<16 x i16> %a) { ; CHECK-LABEL: @reduceshuffle_onein_ext_v16i16( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1) ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) ; CHECK-NEXT: ret i16 [[R]] ; @@ -378,7 +378,7 @@ define i16 @reduceshuffle_onein_ext_v16i16(<16 x i16> %a) { define i16 @reduceshuffle_twoin_concat_v16i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_concat_v16i16( ; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1) ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) ; CHECK-NEXT: ret i16 [[R]] ; @@ -391,7 +391,7 @@ define i16 @reduceshuffle_twoin_concat_v16i16(<8 x i16> %a, <8 x i16> %b) { define i16 @reduceshuffle_twoin_lowelt_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_lowelt_v16i16( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1) ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) ; CHECK-NEXT: ret i16 [[R]] ; @@ -404,7 +404,7 @@ define i16 @reduceshuffle_twoin_lowelt_v16i16(<16 x i16> %a, <16 x i16> %b) { define i16 @reduceshuffle_twoin_notlowelt_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_notlowelt_v16i16( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1) ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) ; CHECK-NEXT: ret i16 [[R]] ; @@ -417,7 +417,7 @@ define i16 @reduceshuffle_twoin_notlowelt_v16i16(<16 x i16> %a, <16 x i16> %b) { define i16 @reduceshuffle_twoin_uneven_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_uneven_v16i16( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], +; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1) ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) ; CHECK-NEXT: ret i16 [[R]] ; @@ -430,9 +430,9 @@ define i16 @reduceshuffle_twoin_uneven_v16i16(<16 x i16> %a, <16 x i16> %b) { define i16 @reduceshuffle_twoin_ext_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_ext_v16i16( ; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[A1:%.*]] = lshr <16 x i16> [[S]], -; CHECK-NEXT: [[A2:%.*]] = and <16 x i16> [[A1]], -; CHECK-NEXT: [[A3:%.*]] = mul nuw <16 x i16> [[A2]], +; CHECK-NEXT: [[A1:%.*]] = lshr <16 x i16> [[S]], splat (i16 7) +; CHECK-NEXT: [[A2:%.*]] = and <16 x i16> [[A1]], splat (i16 257) +; CHECK-NEXT: [[A3:%.*]] = mul nuw <16 x i16> [[A2]], splat (i16 255) ; CHECK-NEXT: [[A4:%.*]] = add <16 x i16> [[A3]], [[S]] ; CHECK-NEXT: [[A5:%.*]] = xor <16 x i16> [[A4]], [[A3]] ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[A5]]) diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization-shufflevector-splat.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization-shufflevector-splat.ll index 80602d12e26f8a..1736ce46c1a778 100644 --- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization-shufflevector-splat.ll +++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization-shufflevector-splat.ll @@ -7,7 +7,7 @@ define <4 x i64> @add_v4i64_allonesmask(<4 x i64> %x) { ; CHECK-LABEL: define <4 x i64> @add_v4i64_allonesmask( ; CHECK-SAME: <4 x i64> [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[X]], <4 x i64> zeroinitializer, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP1]], <4 x i64> zeroinitializer, <4 x i1> , i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP1]], <4 x i64> zeroinitializer, <4 x i1> splat (i1 true), i32 0) ; CHECK-NEXT: ret <4 x i64> [[TMP2]] ; %1 = shufflevector <4 x i64> %x, <4 x i64> zeroinitializer, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll index 97608174b524d7..28d6b59e2b7f02 100644 --- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll @@ -1617,7 +1617,7 @@ define <1 x i64> @add_v1i64_allonesmask(<1 x i64> %x, i64 %y, i32 zeroext %evl) ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <1 x i1> [[SPLAT]], <1 x i1> poison, <1 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <1 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> , <1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 42), <1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP3]], <1 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <1 x i64> [[TMP4]] ; @@ -1626,7 +1626,7 @@ define <1 x i64> @add_v1i64_allonesmask(<1 x i64> %x, i64 %y, i32 zeroext %evl) ; VEC-COMBINE-32-NEXT: [[MASK:%.*]] = shufflevector <1 x i1> [[SPLAT]], <1 x i1> poison, <1 x i32> zeroinitializer ; VEC-COMBINE-32-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[Y:%.*]], i64 0 ; VEC-COMBINE-32-NEXT: [[TMP2:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <1 x i32> zeroinitializer -; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> , <1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-32-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 42), <1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-32-NEXT: [[TMP4:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP3]], <1 x i1> [[MASK]], i32 [[EVL]]) ; VEC-COMBINE-32-NEXT: ret <1 x i64> [[TMP4]] ; @@ -1643,7 +1643,7 @@ define <1 x i64> @add_v1i64_anymask(<1 x i64> %x, i64 %y, <1 x i1> %mask, i32 ze ; ALL-LABEL: @add_v1i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <1 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> , <1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 42), <1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP3]], <1 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <1 x i64> [[TMP4]] ; @@ -1669,7 +1669,7 @@ define <4 x i64> @add_v4i64_allonesmask(<4 x i64> %x, i64 %y, i32 zeroext %evl) ; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <4 x i1> [[SPLAT]], <4 x i1> poison, <4 x i32> zeroinitializer ; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[Y:%.*]], i64 0 ; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer -; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP2]], <4 x i64> , <4 x i1> [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP2]], <4 x i64> splat (i64 42), <4 x i1> [[MASK]], i32 [[EVL:%.*]]) ; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.vp.mul.v4i64(<4 x i64> [[X:%.*]], <4 x i64> [[TMP3]], <4 x i1> [[MASK]], i32 [[EVL]]) ; NO-VEC-COMBINE-NEXT: ret <4 x i64> [[TMP4]] ; @@ -1686,7 +1686,7 @@ define <4 x i64> @add_v4i64_anymask(<4 x i64> %x, i64 %y, <4 x i1> %mask, i32 ze ; ALL-LABEL: @add_v4i64_anymask( ; ALL-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[Y:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP2]], <4 x i64> , <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; ALL-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP2]], <4 x i64> splat (i64 42), <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) ; ALL-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.vp.mul.v4i64(<4 x i64> [[X:%.*]], <4 x i64> [[TMP3]], <4 x i1> [[MASK]], i32 [[EVL]]) ; ALL-NEXT: ret <4 x i64> [[TMP4]] ; diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll index 21adcdfd29b826..bbdd76c58b58ec 100644 --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll @@ -514,7 +514,7 @@ define <2 x i64> @or_constant(i64 %x) { define <2 x i64> @or_constant_not_undef_lane(i64 %x) { ; CHECK-LABEL: @or_constant_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = or i64 [[X:%.*]], -42 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -558,7 +558,7 @@ define <2 x double> @fadd_constant(double %x) { define <2 x double> @fadd_constant_not_undef_lane(double %x) { ; CHECK-LABEL: @fadd_constant_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], -4.200000e+01 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -580,7 +580,7 @@ define <2 x double> @fsub_constant_op0(double %x) { define <2 x double> @fsub_constant_op0_not_undef_lane(double %x) { ; CHECK-LABEL: @fsub_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub nsz double -4.200000e+01, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -602,7 +602,7 @@ define <2 x double> @fsub_constant_op1(double %x) { define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) { ; CHECK-LABEL: @fsub_constant_op1_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -624,7 +624,7 @@ define <2 x double> @fmul_constant(double %x) { define <2 x double> @fmul_constant_not_undef_lane(double %x) { ; CHECK-LABEL: @fmul_constant_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = fmul double [[X:%.*]], -4.200000e+01 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -646,7 +646,7 @@ define <2 x double> @fdiv_constant_op0(double %x) { define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) { ; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -668,7 +668,7 @@ define <2 x double> @fdiv_constant_op1(double %x) { define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) { ; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -690,7 +690,7 @@ define <2 x double> @frem_constant_op0(double %x) { define <2 x double> @frem_constant_op0_not_undef_lane(double %x) { ; CHECK-LABEL: @frem_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = frem double -4.200000e+01, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -712,7 +712,7 @@ define <2 x double> @frem_constant_op1(double %x) { define <2 x double> @frem_constant_op1_not_undef_lane(double %x) { ; CHECK-LABEL: @frem_constant_op1_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = frem nnan double [[X:%.*]], 4.200000e+01 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll index e9ab99c3e0d9c4..edd92c3f1c14c0 100644 --- a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll +++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll @@ -52,7 +52,7 @@ define <2 x i1> @ins1_ins1_i64(i64 %x, i64 %y) { define <2 x i1> @ins0_ins0_f64(double %x, double %y) { ; CHECK-LABEL: @ins0_ins0_f64( ; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp nnan ninf uge double [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> , i1 [[R_SCALAR]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i1> [[R]] ; %i0 = insertelement <2 x double> undef, double %x, i32 0 @@ -157,7 +157,7 @@ define <2 x i1> @constant_op1_i64(i64 %x) { define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) { ; CHECK-LABEL: @constant_op1_i64_not_undef_lane( ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sge i64 [[X:%.*]], 42 -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> , i1 [[R_SCALAR]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i1> [[R]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -194,7 +194,7 @@ define <4 x i1> @constant_op0_i32(i32 %x) { define <4 x i1> @constant_op0_i32_not_undef_lane(i32 %x) { ; CHECK-LABEL: @constant_op0_i32_not_undef_lane( ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ule i32 42, [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> , i1 [[R_SCALAR]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 1 ; CHECK-NEXT: ret <4 x i1> [[R]] ; %ins = insertelement <4 x i32> undef, i32 %x, i32 1 @@ -216,7 +216,7 @@ define <2 x i1> @constant_op0_f64(double %x) { define <2 x i1> @constant_op0_f64_not_undef_lane(double %x) { ; CHECK-LABEL: @constant_op0_f64_not_undef_lane( ; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp nnan ueq double -4.200000e+01, [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> , i1 [[R_SCALAR]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i1> [[R]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -238,7 +238,7 @@ define <2 x i1> @constant_op1_f64(double %x) { define <4 x i1> @constant_op1_f32_not_undef_lane(float %x) { ; CHECK-LABEL: @constant_op1_f32_not_undef_lane( ; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01 -; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> , i1 [[R_SCALAR]], i64 0 +; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0 ; CHECK-NEXT: ret <4 x i1> [[R]] ; %ins = insertelement <4 x float> undef, float %x, i32 0 diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll index 74a58c8d313611..4c70438b2d6e4e 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll @@ -126,7 +126,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> ; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <16 x i8> -; SSE-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], +; SSE-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1) ; SSE-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> ; SSE-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> @@ -135,7 +135,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; AVX-LABEL: @PR35454_1( ; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> -; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], +; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1) ; AVX-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> ; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; AVX-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> @@ -156,7 +156,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> ; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <8 x i16> -; SSE-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], +; SSE-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1) ; SSE-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> ; SSE-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> @@ -165,7 +165,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; AVX-LABEL: @PR35454_2( ; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], +; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1) ; AVX-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> ; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; AVX-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll index c423053a9a4839..77b44d0e40e144 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll @@ -220,7 +220,7 @@ define <4 x i32> @shuf_srem_v4i32_poison(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: define <4 x i32> @shuf_srem_v4i32_poison( ; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[SREM0:%.*]] = srem <4 x i32> [[A1]], [[A0]] -; CHECK-NEXT: [[SREM1:%.*]] = srem <4 x i32> , [[A0]] +; CHECK-NEXT: [[SREM1:%.*]] = srem <4 x i32> splat (i32 1), [[A0]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[SREM0]], <4 x i32> [[SREM1]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll index c8c9aa161ae289..83e088aaa4e27c 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll @@ -185,7 +185,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V]] to <4 x i32> ; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <16 x i8> -; SSE-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], +; SSE-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1) ; SSE-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> ; SSE-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> @@ -195,7 +195,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; AVX-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { ; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> -; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], +; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1) ; AVX-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> ; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; AVX-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> @@ -217,7 +217,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V]] to <4 x i32> ; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <8 x i16> -; SSE-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], +; SSE-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1) ; SSE-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> ; SSE-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> @@ -227,7 +227,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; AVX-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { ; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V]] to <8 x i16> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], +; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1) ; AVX-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> ; AVX-NEXT: [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> ; AVX-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> diff --git a/llvm/test/tools/llvm-reduce/reduce-opcodes.ll b/llvm/test/tools/llvm-reduce/reduce-opcodes.ll index 3156d53f70f17d..d154b15b610d04 100644 --- a/llvm/test/tools/llvm-reduce/reduce-opcodes.ll +++ b/llvm/test/tools/llvm-reduce/reduce-opcodes.ll @@ -179,7 +179,7 @@ define float @sqrt_ninf(float %a, float %b) { } ; CHECK-LABEL: @sqrt_vec( -; RESULT-NEXT: %op = fmul <2 x float> %a, , !dbg !7 +; RESULT-NEXT: %op = fmul <2 x float> %a, splat (float 2.000000e+00), !dbg !7 ; RESULT-NEXT: ret define <2 x float> @sqrt_vec(<2 x float> %a, <2 x float> %b) { %op = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a), !dbg !7 diff --git a/llvm/test/tools/llvm-reduce/reduce-operands-fp.ll b/llvm/test/tools/llvm-reduce/reduce-operands-fp.ll index 2ceebd7a5a86ac..b547c819bf0de9 100644 --- a/llvm/test/tools/llvm-reduce/reduce-operands-fp.ll +++ b/llvm/test/tools/llvm-reduce/reduce-operands-fp.ll @@ -36,13 +36,13 @@ ; ONE: %fadd3 = fadd float 1.000000e+00, 1.000000e+00 ; ONE: %fadd4 = fadd float 1.000000e+00, 1.000000e+00 ; ONE: %fadd5 = fadd float 1.000000e+00, 1.000000e+00 -; ONE: %fadd6 = fadd <2 x float> %arg2, -; ONE: %fadd7 = fadd <2 x float> , -; ONE: %fadd8 = fadd <2 x float> , zeroinitializer -; ONE: %fadd9 = fadd <2 x float> , -; ONE: %fadd10 = fadd <2 x float> , -; ONE: %fadd11 = fadd <2 x float> , -; ONE: %fadd12 = fadd <2 x float> , +; ONE: %fadd6 = fadd <2 x float> %arg2, splat (float 1.000000e+00) +; ONE: %fadd7 = fadd <2 x float> splat (float 1.000000e+00), splat (float 1.000000e+00) +; ONE: %fadd8 = fadd <2 x float> splat (float 1.000000e+00), zeroinitializer +; ONE: %fadd9 = fadd <2 x float> splat (float 1.000000e+00), splat (float 1.000000e+00) +; ONE: %fadd10 = fadd <2 x float> splat (float 1.000000e+00), splat (float 1.000000e+00) +; ONE: %fadd11 = fadd <2 x float> splat (float 1.000000e+00), splat (float 1.000000e+00) +; ONE: %fadd12 = fadd <2 x float> splat (float 1.000000e+00), splat (float 1.000000e+00) ; ZERO: %fadd0 = fadd float %arg0, 0.000000e+00 @@ -66,13 +66,13 @@ ; NAN: %fadd3 = fadd float 0x7FF8000000000000, 1.000000e+00 ; NAN: %fadd4 = fadd float 0x7FF8000000000000, 0x7FF8000000000000 ; NAN: %fadd5 = fadd float 0x7FF8000000000000, 0x7FF8000000000000 -; NAN: %fadd6 = fadd <2 x float> %arg2, -; NAN: %fadd7 = fadd <2 x float> , -; NAN: %fadd8 = fadd <2 x float> , zeroinitializer -; NAN: %fadd9 = fadd <2 x float> , -; NAN: %fadd10 = fadd <2 x float> , -; NAN: %fadd11 = fadd <2 x float> , -; NAN: %fadd12 = fadd <2 x float> , +; NAN: %fadd6 = fadd <2 x float> %arg2, splat (float 0x7FF8000000000000) +; NAN: %fadd7 = fadd <2 x float> splat (float 0x7FF8000000000000), splat (float 0x7FF8000000000000) +; NAN: %fadd8 = fadd <2 x float> splat (float 0x7FF8000000000000), zeroinitializer +; NAN: %fadd9 = fadd <2 x float> splat (float 0x7FF8000000000000), splat (float 1.000000e+00) +; NAN: %fadd10 = fadd <2 x float> splat (float 0x7FF8000000000000), splat (float 0x7FF8000000000000) +; NAN: %fadd11 = fadd <2 x float> splat (float 0x7FF8000000000000), splat (float 0x7FF8000000000000) +; NAN: %fadd12 = fadd <2 x float> splat (float 0x7FF8000000000000), splat (float 0x7FF8000000000000) define void @foo(float %arg0, float %arg1, <2 x float> %arg2, <2 x float> %arg3) { bb0: diff --git a/llvm/test/tools/llvm-reduce/reduce-operands-int.ll b/llvm/test/tools/llvm-reduce/reduce-operands-int.ll index 3005058742ce31..397a1595ca6b2c 100644 --- a/llvm/test/tools/llvm-reduce/reduce-operands-int.ll +++ b/llvm/test/tools/llvm-reduce/reduce-operands-int.ll @@ -30,12 +30,12 @@ ; ONE: %add2 = add i32 1, 0 ; ONE: %add3 = add i32 1, 1 ; ONE: %add4 = add i32 1, 1 -; ONE: %add5 = add <2 x i32> %arg2, -; ONE: %add6 = add <2 x i32> , -; ONE: %add7 = add <2 x i32> , zeroinitializer -; ONE: %add8 = add <2 x i32> , -; ONE: %add9 = add <2 x i32> , -; ONE: %add10 = add <2 x i32> , +; ONE: %add5 = add <2 x i32> %arg2, splat (i32 1) +; ONE: %add6 = add <2 x i32> splat (i32 1), splat (i32 1) +; ONE: %add7 = add <2 x i32> splat (i32 1), zeroinitializer +; ONE: %add8 = add <2 x i32> splat (i32 1), splat (i32 1) +; ONE: %add9 = add <2 x i32> splat (i32 1), splat (i32 1) +; ONE: %add10 = add <2 x i32> splat (i32 1), splat (i32 1) ; ZERO: %add0 = add i32 %arg0, 0 diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index c884f83cb4d32d..11d73ea7c84ad2 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -95,31 +95,31 @@ llvm.mlir.global internal constant @int_gep() : !llvm.ptr { // CHECK{LITERAL}: @dense_float_vector = internal global <3 x float> llvm.mlir.global internal @dense_float_vector(dense<[1.0, 2.0, 3.0]> : vector<3xf32>) : vector<3xf32> -// CHECK{LITERAL}: @splat_float_vector = internal global <3 x float> +// CHECK{LITERAL}: @splat_float_vector = internal global <3 x float> splat (float 4.200000e+01) llvm.mlir.global internal @splat_float_vector(dense<42.0> : vector<3xf32>) : vector<3xf32> // CHECK{LITERAL}: @dense_double_vector = internal global <3 x double> llvm.mlir.global internal @dense_double_vector(dense<[1.0, 2.0, 3.0]> : vector<3xf64>) : vector<3xf64> -// CHECK{LITERAL}: @splat_double_vector = internal global <3 x double> +// CHECK{LITERAL}: @splat_double_vector = internal global <3 x double> splat (double 4.200000e+01) llvm.mlir.global internal @splat_double_vector(dense<42.0> : vector<3xf64>) : vector<3xf64> // CHECK{LITERAL}: @dense_i64_vector = internal global <3 x i64> llvm.mlir.global internal @dense_i64_vector(dense<[1, 2, 3]> : vector<3xi64>) : vector<3xi64> -// CHECK{LITERAL}: @splat_i64_vector = internal global <3 x i64> +// CHECK{LITERAL}: @splat_i64_vector = internal global <3 x i64> splat (i64 42) llvm.mlir.global internal @splat_i64_vector(dense<42> : vector<3xi64>) : vector<3xi64> // CHECK{LITERAL}: @dense_float_vector_2d = internal global [2 x <2 x float>] [<2 x float> , <2 x float> ] llvm.mlir.global internal @dense_float_vector_2d(dense<[[1.0, 2.0], [3.0, 4.0]]> : vector<2x2xf32>) : !llvm.array<2 x vector<2xf32>> -// CHECK{LITERAL}: @splat_float_vector_2d = internal global [2 x <2 x float>] [<2 x float> , <2 x float> ] +// CHECK{LITERAL}: @splat_float_vector_2d = internal global [2 x <2 x float>] [<2 x float> splat (float 4.200000e+01), <2 x float> splat (float 4.200000e+01)] llvm.mlir.global internal @splat_float_vector_2d(dense<42.0> : vector<2x2xf32>) : !llvm.array<2 x vector<2xf32>> // CHECK{LITERAL}: @dense_float_vector_3d = internal global [2 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> , <2 x float> ], [2 x <2 x float>] [<2 x float> , <2 x float> ]] llvm.mlir.global internal @dense_float_vector_3d(dense<[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]> : vector<2x2x2xf32>) : !llvm.array<2 x !llvm.array<2 x vector<2xf32>>> -// CHECK{LITERAL}: @splat_float_vector_3d = internal global [2 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> , <2 x float> ], [2 x <2 x float>] [<2 x float> , <2 x float> ]] +// CHECK{LITERAL}: @splat_float_vector_3d = internal global [2 x [2 x <2 x float>]] [[2 x <2 x float>] [<2 x float> splat (float 4.200000e+01), <2 x float> splat (float 4.200000e+01)], [2 x <2 x float>] [<2 x float> splat (float 4.200000e+01), <2 x float> splat (float 4.200000e+01)]] llvm.mlir.global internal @splat_float_vector_3d(dense<42.0> : vector<2x2x2xf32>) : !llvm.array<2 x !llvm.array<2 x vector<2xf32>>> // @@ -929,7 +929,7 @@ llvm.func @multireturn_caller() { // CHECK-LABEL: define <4 x float> @vector_ops(<4 x float> {{%.*}}, <4 x i1> {{%.*}}, <4 x i64> {{%.*}}) llvm.func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>) -> vector<4xf32> { %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : vector<4xf32> -// CHECK-NEXT: %4 = fadd <4 x float> %0, +// CHECK-NEXT: %4 = fadd <4 x float> %0, splat (float 4.200000e+01) %1 = llvm.fadd %arg0, %0 : vector<4xf32> // CHECK-NEXT: %5 = select <4 x i1> %1, <4 x float> %4, <4 x float> %0 %2 = llvm.select %arg1, %1, %arg0 : vector<4xi1>, vector<4xf32> @@ -941,9 +941,9 @@ llvm.func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4 %5 = llvm.srem %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %9 = urem <4 x i64> %2, %2 %6 = llvm.urem %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %10 = fdiv <4 x float> %0, +// CHECK-NEXT: %10 = fdiv <4 x float> %0, splat (float 4.200000e+01) %7 = llvm.fdiv %arg0, %0 : vector<4xf32> -// CHECK-NEXT: %11 = frem <4 x float> %0, +// CHECK-NEXT: %11 = frem <4 x float> %0, splat (float 4.200000e+01) %8 = llvm.frem %arg0, %0 : vector<4xf32> // CHECK-NEXT: %12 = and <4 x i64> %2, %2 %9 = llvm.and %arg2, %arg2 : vector<4xi64> @@ -991,7 +991,7 @@ llvm.func @vector_splat_3d() -> !llvm.array<4 x array<16 x vector<4 x f32>>> { // CHECK-LABEL: @vector_splat_nonzero llvm.func @vector_splat_nonzero() -> vector<4xf32> { - // CHECK: ret <4 x float> + // CHECK: ret <4 x float> splat (float 1.000000e+00) %0 = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> llvm.return %0 : vector<4xf32> } @@ -1336,7 +1336,7 @@ llvm.func @structconstant() -> !llvm.struct<(i32, f32)> { // CHECK-LABEL: @indexconstantsplat llvm.func @indexconstantsplat() -> vector<3xi32> { %1 = llvm.mlir.constant(dense<42> : vector<3xindex>) : vector<3xi32> - // CHECK: ret <3 x i32> + // CHECK: ret <3 x i32> splat (i32 42) llvm.return %1 : vector<3xi32> }